From 006e0f105455c039d85c8a75bc0faa9570ee6a26 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 25 Aug 2023 23:36:30 +0200
Subject: [PATCH 01/53] Split off and wrap cloning API
---
ext/dom/document.c | 8 +-------
ext/dom/node.c | 30 +-----------------------------
ext/dom/php_dom.c | 10 ++++++++++
ext/dom/php_dom.h | 2 ++
4 files changed, 14 insertions(+), 36 deletions(-)
diff --git a/ext/dom/document.c b/ext/dom/document.c
index 31b889125269b..2aebbccca719d 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -776,8 +776,6 @@ PHP_METHOD(DOMDocument, importNode)
dom_object *intern, *nodeobj;
int ret;
bool recursive = 0;
- /* See http://www.xmlsoft.org/html/libxml-tree.html#xmlDocCopyNode for meaning of values */
- int extended_recursive;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "O|b", &node, dom_node_class_entry, &recursive) == FAILURE) {
RETURN_THROWS();
@@ -796,11 +794,7 @@ PHP_METHOD(DOMDocument, importNode)
if (nodep->doc == docp) {
retnodep = nodep;
} else {
- extended_recursive = recursive;
- if ((recursive == 0) && (nodep->type == XML_ELEMENT_NODE)) {
- extended_recursive = 2;
- }
- retnodep = xmlDocCopyNode(nodep, docp, extended_recursive);
+ retnodep = dom_clone_node(nodep, docp, recursive);
if (!retnodep) {
RETURN_FALSE;
}
diff --git a/ext/dom/node.c b/ext/dom/node.c
index 5719bdde9999d..cd62565df884d 100644
--- a/ext/dom/node.c
+++ b/ext/dom/node.c
@@ -1329,40 +1329,12 @@ PHP_METHOD(DOMNode, cloneNode)
DOM_GET_OBJ(n, id, xmlNodePtr, intern);
- node = xmlDocCopyNode(n, n->doc, recursive);
+ node = dom_clone_node(n, n->doc, recursive);
if (!node) {
RETURN_FALSE;
}
- /* When deep is false Element nodes still require the attributes
- Following taken from libxml as xmlDocCopyNode doesn't do this */
- if (n->type == XML_ELEMENT_NODE && recursive == 0) {
- if (n->nsDef != NULL) {
- node->nsDef = xmlCopyNamespaceList(n->nsDef);
- }
- if (n->ns != NULL) {
- xmlNsPtr ns;
- ns = xmlSearchNs(n->doc, node, n->ns->prefix);
- if (ns == NULL) {
- ns = xmlSearchNs(n->doc, n, n->ns->prefix);
- if (ns != NULL) {
- xmlNodePtr root = node;
-
- while (root->parent != NULL) {
- root = root->parent;
- }
- node->ns = xmlNewNs(root, ns->href, ns->prefix);
- }
- } else {
- node->ns = ns;
- }
- }
- if (n->properties != NULL) {
- node->properties = xmlCopyPropList(node, n->properties);
- }
- }
-
if (node->type == XML_ATTRIBUTE_NODE && n->ns != NULL && node->ns == NULL) {
/* Let reconciliation deal with this. The lifetime of the namespace poses no problem
* because we're increasing the refcount of the document proxy at the return.
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index ce540ad4a3b0a..99d2f7fdd1d2a 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -1819,4 +1819,14 @@ static int dom_nodemap_has_dimension(zend_object *object, zval *member, int chec
return offset >= 0 && offset < php_dom_get_namednodemap_length(php_dom_obj_from_obj(object));
} /* }}} end dom_nodemap_has_dimension */
+xmlNodePtr dom_clone_node(xmlNodePtr node, xmlDocPtr doc, bool recursive)
+{
+ /* See http://www.xmlsoft.org/html/libxml-tree.html#xmlDocCopyNode for meaning of values */
+ int extended_recursive = recursive;
+ if (!recursive && node->type == XML_ELEMENT_NODE) {
+ extended_recursive = 2;
+ }
+ return xmlDocCopyNode(node, doc, extended_recursive);
+}
+
#endif /* HAVE_DOM */
diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h
index b77036e83c294..df13dcef0d6cc 100644
--- a/ext/dom/php_dom.h
+++ b/ext/dom/php_dom.h
@@ -174,6 +174,8 @@ void php_dom_nodelist_get_item_into_zval(dom_nnodemap_object *objmap, zend_long
int php_dom_get_namednodemap_length(dom_object *obj);
int php_dom_get_nodelist_length(dom_object *obj);
+xmlNodePtr dom_clone_node(xmlNodePtr node, xmlDocPtr doc, bool recursive);
+
#define DOM_GET_INTERN(__id, __intern) { \
__intern = Z_DOMOBJ_P(__id); \
if (UNEXPECTED(__intern->ptr == NULL)) { \
From d0814070f74904b4a348e659c0b7338ec0d391ce Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 26 Aug 2023 01:11:26 +0200
Subject: [PATCH 02/53] Update ext/libxml APIs so that non-libxml users can
hook into the error mechanism
---
ext/libxml/libxml.c | 43 +++++++++++++++++++++++++++++++----------
ext/libxml/php_libxml.h | 1 +
2 files changed, 34 insertions(+), 10 deletions(-)
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index 6ddbdff5fb800..baf5bfa5028e5 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -608,7 +608,7 @@ static void _php_libxml_free_error(void *ptr)
xmlResetError((xmlErrorPtr) ptr);
}
-static void _php_list_set_error_structure(xmlErrorPtr error, const char *msg)
+static void _php_list_set_error_structure(xmlErrorPtr error, const char *msg, int line, int column)
{
xmlError error_copy;
int ret;
@@ -621,6 +621,8 @@ static void _php_list_set_error_structure(xmlErrorPtr error, const char *msg)
} else {
error_copy.code = XML_ERR_INTERNAL_ERROR;
error_copy.level = XML_ERR_ERROR;
+ error_copy.line = line;
+ error_copy.int2 = column;
error_copy.message = (char*)xmlStrdup((const xmlChar*)msg);
ret = 0;
}
@@ -630,7 +632,7 @@ static void _php_list_set_error_structure(xmlErrorPtr error, const char *msg)
}
}
-static void php_libxml_ctx_error_level(int level, void *ctx, const char *msg)
+static void php_libxml_ctx_error_level(int level, void *ctx, const char *msg, int line)
{
xmlParserCtxtPtr parser;
@@ -638,9 +640,9 @@ static void php_libxml_ctx_error_level(int level, void *ctx, const char *msg)
if (parser != NULL && parser->input != NULL) {
if (parser->input->filename) {
- php_error_docref(NULL, level, "%s in %s, line: %d", msg, parser->input->filename, parser->input->line);
+ php_error_docref(NULL, level, "%s in %s, line: %d", msg, parser->input->filename, line);
} else {
- php_error_docref(NULL, level, "%s in Entity, line: %d", msg, parser->input->line);
+ php_error_docref(NULL, level, "%s in Entity, line: %d", msg, line);
}
} else {
php_error_docref(NULL, E_WARNING, "%s", msg);
@@ -650,13 +652,13 @@ static void php_libxml_ctx_error_level(int level, void *ctx, const char *msg)
void php_libxml_issue_error(int level, const char *msg)
{
if (LIBXML(error_list)) {
- _php_list_set_error_structure(NULL, msg);
+ _php_list_set_error_structure(NULL, msg, 0, 0);
} else {
php_error_docref(NULL, level, "%s", msg);
}
}
-static void php_libxml_internal_error_handler(int error_type, void *ctx, const char **msg, va_list ap)
+static void php_libxml_internal_error_handler_ex(int error_type, void *ctx, const char **msg, va_list ap, int line, int column)
{
char *buf;
int len, len_iter, output = 0;
@@ -676,15 +678,15 @@ static void php_libxml_internal_error_handler(int error_type, void *ctx, const c
if (output == 1) {
if (LIBXML(error_list)) {
- _php_list_set_error_structure(NULL, ZSTR_VAL(LIBXML(error_buffer).s));
+ _php_list_set_error_structure(NULL, ZSTR_VAL(LIBXML(error_buffer).s), line, column);
} else if (!EG(exception)) {
/* Don't throw additional notices/warnings if an exception has already been thrown. */
switch (error_type) {
case PHP_LIBXML_CTX_ERROR:
- php_libxml_ctx_error_level(E_WARNING, ctx, ZSTR_VAL(LIBXML(error_buffer).s));
+ php_libxml_ctx_error_level(E_WARNING, ctx, ZSTR_VAL(LIBXML(error_buffer).s), line);
break;
case PHP_LIBXML_CTX_WARNING:
- php_libxml_ctx_error_level(E_NOTICE, ctx, ZSTR_VAL(LIBXML(error_buffer).s));
+ php_libxml_ctx_error_level(E_NOTICE, ctx, ZSTR_VAL(LIBXML(error_buffer).s), line);
break;
default:
php_error_docref(NULL, E_WARNING, "%s", ZSTR_VAL(LIBXML(error_buffer).s));
@@ -694,6 +696,19 @@ static void php_libxml_internal_error_handler(int error_type, void *ctx, const c
}
}
+static void php_libxml_internal_error_handler(int error_type, void *ctx, const char **msg, va_list ap)
+{
+ int line = 0;
+ int column = 0;
+ xmlParserCtxtPtr parser = (xmlParserCtxtPtr) ctx;
+ /* Context is not valid for PHP_LIBXML_ERROR, don't dereference it in that case */
+ if (error_type != PHP_LIBXML_ERROR && parser != NULL && parser->input != NULL) {
+ line = parser->input->line;
+ column = parser->input->col;
+ }
+ php_libxml_internal_error_handler_ex(error_type, ctx, msg, ap, line, column);
+}
+
static xmlParserInputPtr _php_libxml_external_entity_loader(const char *URL,
const char *ID, xmlParserCtxtPtr context)
{
@@ -823,6 +838,14 @@ static xmlParserInputPtr _php_libxml_pre_ext_ent_loader(const char *URL,
}
}
+PHP_LIBXML_API void php_libxml_pretend_ctx_error_ex(int line, int column, const char *msg,...)
+{
+ va_list args;
+ va_start(args, msg);
+ php_libxml_internal_error_handler_ex(PHP_LIBXML_CTX_ERROR, NULL, &msg, args, line, column);
+ va_end(args);
+}
+
PHP_LIBXML_API void php_libxml_ctx_error(void *ctx, const char *msg, ...)
{
va_list args;
@@ -841,7 +864,7 @@ PHP_LIBXML_API void php_libxml_ctx_warning(void *ctx, const char *msg, ...)
static void php_libxml_structured_error_handler(void *userData, xmlErrorPtr error)
{
- _php_list_set_error_structure(error, NULL);
+ _php_list_set_error_structure(error, NULL, 0, 0);
return;
}
diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h
index 7ce7def92ae5f..7050dd7ab842e 100644
--- a/ext/libxml/php_libxml.h
+++ b/ext/libxml/php_libxml.h
@@ -131,6 +131,7 @@ PHP_LIBXML_API void php_libxml_node_free_resource(xmlNodePtr node);
PHP_LIBXML_API void php_libxml_node_decrement_resource(php_libxml_node_object *object);
PHP_LIBXML_API void php_libxml_error_handler(void *ctx, const char *msg, ...);
PHP_LIBXML_API void php_libxml_ctx_warning(void *ctx, const char *msg, ...);
+PHP_LIBXML_API void php_libxml_pretend_ctx_error_ex(int line, int column, const char *msg,...);
PHP_LIBXML_API void php_libxml_ctx_error(void *ctx, const char *msg, ...);
PHP_LIBXML_API int php_libxml_xmlCheckUTF8(const unsigned char *s);
PHP_LIBXML_API void php_libxml_switch_context(zval *context, zval *oldcontext);
From c031fff4aad9a8f089097fa3fe7b8fd7abb4eba8 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 26 Aug 2023 01:11:52 +0200
Subject: [PATCH 03/53] Add is_html5_class field to document data in libxml
---
ext/libxml/libxml.c | 1 +
ext/libxml/php_libxml.h | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index baf5bfa5028e5..b55f57e2752d9 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -1356,6 +1356,7 @@ PHP_LIBXML_API int php_libxml_increment_doc_ref(php_libxml_node_object *object,
object->document->refcount = ret_refcount;
object->document->doc_props = NULL;
object->document->cache_tag.modification_nr = 1; /* iterators start at 0, such that they will start in an uninitialised state */
+ object->document->is_html5_class = false;
}
return ret_refcount;
diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h
index 7050dd7ab842e..7ffb93274ba7d 100644
--- a/ext/libxml/php_libxml.h
+++ b/ext/libxml/php_libxml.h
@@ -63,9 +63,10 @@ typedef struct {
typedef struct _php_libxml_ref_obj {
void *ptr;
- int refcount;
libxml_doc_props *doc_props;
php_libxml_cache_tag cache_tag;
+ int refcount;
+ bool is_html5_class;
} php_libxml_ref_obj;
typedef struct _php_libxml_node_ptr {
From 32747ec49d0e301d88be89dd006600daded23f3b Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 26 Aug 2023 01:12:23 +0200
Subject: [PATCH 04/53] Implement HTML5Document
---
ext/dom/config.m4 | 19 +-
ext/dom/config.w32 | 18 +-
ext/dom/document.c | 33 +-
ext/dom/dom_ce.h | 1 +
ext/dom/dom_properties.h | 3 +
ext/dom/html5_document.c | 1024 +++++++++++
ext/dom/html5_parser.c | 262 +++
ext/dom/html5_parser.h | 57 +
ext/dom/html5_serializer.c | 351 ++++
ext/dom/html5_serializer.h | 31 +
ext/dom/namespace_compat.c | 54 +
ext/dom/namespace_compat.h | 39 +
ext/dom/node.c | 2 +-
ext/dom/php_dom.c | 50 +-
ext/dom/php_dom.h | 12 +-
ext/dom/php_dom.stub.php | 1575 +++++++++--------
ext/dom/php_dom_arginfo.h | 77 +-
.../HTML5/encoding/Document_GB18030.phpt | 37 +
.../HTML5/encoding/Document_Shift_JIS.phpt | 41 +
.../HTML5/encoding/Document_UTF16BE_BOM.phpt | 39 +
.../HTML5/encoding/Document_UTF16LE_BOM.phpt | 39 +
.../HTML5/encoding/Document_UTF8_BOM.phpt | 39 +
.../HTML5/encoding/Document_Windows1251.phpt | 41 +
.../Document_encoding_edge_case_01.phpt | 16 +
.../Document_encoding_edge_case_02.phpt | 28 +
.../Document_encoding_edge_case_03.phpt | 18 +
.../Document_encoding_edge_case_04.phpt | 16 +
.../Document_encoding_edge_case_05.phpt | 22 +
.../Document_encoding_edge_case_06.phpt | 16 +
.../Document_encoding_edge_case_07.phpt | 22 +
.../Document_encoding_field_test.phpt | 36 +
.../Document_encoding_unicode_error.phpt | 27 +
.../encoding/Document_fallback_encoding.phpt | 24 +
.../Document_load_different_encoding.phpt | 19 +
.../HTML5/encoding/fallback_encoding.html | 6 +
ext/dom/tests/HTML5/encoding/gb18030.html | 7 +
ext/dom/tests/HTML5/encoding/shift_jis.html | 7 +
ext/dom/tests/HTML5/encoding/utf16be_bom.html | Bin 0 -> 212 bytes
ext/dom/tests/HTML5/encoding/utf16le_bom.html | Bin 0 -> 212 bytes
.../tests/HTML5/encoding/utf16le_error.html | Bin 0 -> 268 bytes
ext/dom/tests/HTML5/encoding/utf8_bom.html | 7 +
ext/dom/tests/HTML5/encoding/windows1251.html | 7 +
.../Document_adopt_DOMDocument.phpt | 30 +
.../HTML5/interactions/Document_clone.phpt | 31 +
.../Document_node_ownerDocument_for_XML.phpt | 98 +
.../Document_registerNodeClass_01.phpt | 28 +
.../Document_registerNodeClass_02.phpt | 17 +
.../Document_registerNodeClass_03.phpt | 26 +
...should_retain_properties_and_owner_01.phpt | 107 ++
...should_retain_properties_and_owner_02.phpt | 106 ++
...dHTMLFile_DOM_HTML_NO_DEFAULT_NS copy.phpt | 39 +
.../Document_loadHTMLFile_empty_path.phpt | 19 +
...ment_loadHTMLFile_local_existing_file.phpt | 24 +
...oadHTMLFile_local_file_does_not_exist.phpt | 15 +
...oadHTMLFile_nul_terminator_cases_path.phpt | 21 +
...cument_loadHTMLFile_parser_warning_01.phpt | 20 +
...cument_loadHTMLFile_parser_warning_02.phpt | 21 +
...cument_loadHTMLFile_parser_warning_03.phpt | 17 +
...dHTMLFile_with_failing_stream_wrapper.phpt | 51 +
...dHTMLFile_with_working_stream_wrapper.phpt | 62 +
...ument_loadHTML_DOM_HTML_NO_DEFAULT_NS.phpt | 39 +
.../Document_loadHTML_LIBXML_COMPACT.phpt | 41 +
...cument_loadHTML_LIBXML_HTML_NOIMPLIED.phpt | 93 +
...dHTML_LIBXML_HTML_NOIMPLIED_namespace.phpt | 17 +
.../HTML5/parser/Document_loadHTML_empty.phpt | 15 +
.../parser/Document_loadHTML_line_column.phpt | 59 +
.../Document_loadHTML_normal_no_error.phpt | 41 +
.../parser/Document_loadHTML_old_dtd.phpt | 40 +
.../Document_loadHTML_parser_warning_01.phpt | 24 +
.../Document_loadHTML_parser_warning_02.phpt | 33 +
.../Document_loadHTML_parser_warning_03.phpt | 18 +
...oadHTML_parser_warning_internal_error.phpt | 31 +
.../Document_loadHTML_without_body.phpt | 16 +
.../HTML5/parser/Document_load_options.phpt | 109 ++
ext/dom/tests/HTML5/parser/paragraph.html | 1 +
.../tests/HTML5/parser/parser_warning_01.html | 7 +
.../tests/HTML5/parser/parser_warning_02.html | Bin 0 -> 191 bytes
.../tests/HTML5/parser/parser_warning_03.html | 6 +
.../HTML5/parser/predefined_namespaces.phpt | 101 ++
.../serializer/Document_escape_attribute.phpt | 16 +
.../serializer/Document_escape_nbsp.phpt | 14 +
.../Document_serialize_attribute_ns.phpt | 22 +
.../serializer/Document_serialize_cdata.phpt | 14 +
.../Document_serialize_comment.phpt | 14 +
.../Document_serialize_doctype.phpt | 40 +
.../Document_serialize_element_ns.phpt | 29 +
.../Document_serialize_failing_stream.phpt | 49 +
.../Document_serialize_fragment.phpt | 18 +
.../Document_serialize_full_document.phpt | 52 +
.../Document_serialize_ns_imported_01.phpt | 34 +
.../Document_serialize_ns_imported_02.phpt | 33 +
.../Document_serialize_ns_imported_03.phpt | 33 +
.../Document_serialize_ns_imported_04.phpt | 33 +
.../Document_serialize_ns_imported_05.phpt | 33 +
.../Document_serialize_ns_imported_06.phpt | 33 +
...ment_serialize_processing_instruction.phpt | 15 +
.../Document_serialize_roots_test_empty.phpt | 30 +
.../Document_serialize_text_01.phpt | 16 +
.../Document_serialize_text_02.phpt | 27 +
.../Document_serialize_text_03.phpt | 27 +
.../Document_serialize_void_elements.phpt | 100 ++
ext/dom/xpath.c | 1 -
102 files changed, 5422 insertions(+), 806 deletions(-)
create mode 100644 ext/dom/html5_document.c
create mode 100644 ext/dom/html5_parser.c
create mode 100644 ext/dom/html5_parser.h
create mode 100644 ext/dom/html5_serializer.c
create mode 100644 ext/dom/html5_serializer.h
create mode 100644 ext/dom/namespace_compat.c
create mode 100644 ext/dom/namespace_compat.h
create mode 100644 ext/dom/tests/HTML5/encoding/Document_GB18030.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_Shift_JIS.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_UTF16BE_BOM.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_UTF16LE_BOM.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_UTF8_BOM.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_Windows1251.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_01.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_02.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_03.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_04.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_05.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_06.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_07.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_encoding_field_test.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_encoding_unicode_error.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_fallback_encoding.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/Document_load_different_encoding.phpt
create mode 100644 ext/dom/tests/HTML5/encoding/fallback_encoding.html
create mode 100644 ext/dom/tests/HTML5/encoding/gb18030.html
create mode 100644 ext/dom/tests/HTML5/encoding/shift_jis.html
create mode 100644 ext/dom/tests/HTML5/encoding/utf16be_bom.html
create mode 100644 ext/dom/tests/HTML5/encoding/utf16le_bom.html
create mode 100644 ext/dom/tests/HTML5/encoding/utf16le_error.html
create mode 100644 ext/dom/tests/HTML5/encoding/utf8_bom.html
create mode 100644 ext/dom/tests/HTML5/encoding/windows1251.html
create mode 100644 ext/dom/tests/HTML5/interactions/Document_adopt_DOMDocument.phpt
create mode 100644 ext/dom/tests/HTML5/interactions/Document_clone.phpt
create mode 100644 ext/dom/tests/HTML5/interactions/Document_node_ownerDocument_for_XML.phpt
create mode 100644 ext/dom/tests/HTML5/interactions/Document_registerNodeClass_01.phpt
create mode 100644 ext/dom/tests/HTML5/interactions/Document_registerNodeClass_02.phpt
create mode 100644 ext/dom/tests/HTML5/interactions/Document_registerNodeClass_03.phpt
create mode 100644 ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_01.phpt
create mode 100644 ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_02.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_DOM_HTML_NO_DEFAULT_NS copy.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_empty_path.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_existing_file.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_file_does_not_exist.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_nul_terminator_cases_path.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_01.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_02.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_03.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_failing_stream_wrapper.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_working_stream_wrapper.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_DOM_HTML_NO_DEFAULT_NS.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_COMPACT.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED_namespace.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_empty.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_line_column.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_normal_no_error.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_old_dtd.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_01.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_02.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_03.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_internal_error.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_without_body.phpt
create mode 100644 ext/dom/tests/HTML5/parser/Document_load_options.phpt
create mode 100644 ext/dom/tests/HTML5/parser/paragraph.html
create mode 100644 ext/dom/tests/HTML5/parser/parser_warning_01.html
create mode 100644 ext/dom/tests/HTML5/parser/parser_warning_02.html
create mode 100644 ext/dom/tests/HTML5/parser/parser_warning_03.html
create mode 100644 ext/dom/tests/HTML5/parser/predefined_namespaces.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_escape_attribute.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_escape_nbsp.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_attribute_ns.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_cdata.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_comment.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_doctype.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_element_ns.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_failing_stream.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_fragment.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_full_document.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_01.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_02.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_03.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_04.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_05.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_06.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_processing_instruction.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_roots_test_empty.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_text_01.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_text_02.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_text_03.phpt
create mode 100644 ext/dom/tests/HTML5/serializer/Document_serialize_void_elements.phpt
diff --git a/ext/dom/config.m4 b/ext/dom/config.m4
index 6a83d10c8e245..c43bb35f100b5 100644
--- a/ext/dom/config.m4
+++ b/ext/dom/config.m4
@@ -12,7 +12,21 @@ if test "$PHP_DOM" != "no"; then
PHP_SETUP_LIBXML(DOM_SHARED_LIBADD, [
AC_DEFINE(HAVE_DOM,1,[ ])
+ PHP_LEXBOR_CFLAGS="-I@ext_srcdir@/lexbor -DLEXBOR_STATIC"
+ LEXBOR_DIR="lexbor/lexbor"
+ LEXBOR_SOURCES="$LEXBOR_DIR/ports/posix/lexbor/core/memory.c \
+ $LEXBOR_DIR/core/array_obj.c $LEXBOR_DIR/core/array.c $LEXBOR_DIR/core/avl.c $LEXBOR_DIR/core/bst.c $LEXBOR_DIR/core/diyfp.c $LEXBOR_DIR/core/conv.c $LEXBOR_DIR/core/dobject.c $LEXBOR_DIR/core/dtoa.c $LEXBOR_DIR/core/hash.c $LEXBOR_DIR/core/mem.c $LEXBOR_DIR/core/mraw.c $LEXBOR_DIR/core/print.c $LEXBOR_DIR/core/serialize.c $LEXBOR_DIR/core/shs.c $LEXBOR_DIR/core/str.c $LEXBOR_DIR/core/strtod.c \
+ $LEXBOR_DIR/dom/interface.c $LEXBOR_DIR/dom/interfaces/attr.c $LEXBOR_DIR/dom/interfaces/cdata_section.c $LEXBOR_DIR/dom/interfaces/character_data.c $LEXBOR_DIR/dom/interfaces/comment.c $LEXBOR_DIR/dom/interfaces/document.c $LEXBOR_DIR/dom/interfaces/document_fragment.c $LEXBOR_DIR/dom/interfaces/document_type.c $LEXBOR_DIR/dom/interfaces/element.c $LEXBOR_DIR/dom/interfaces/node.c $LEXBOR_DIR/dom/interfaces/processing_instruction.c $LEXBOR_DIR/dom/interfaces/shadow_root.c $LEXBOR_DIR/dom/interfaces/text.c \
+ $LEXBOR_DIR/html/tokenizer/error.c $LEXBOR_DIR/html/tokenizer/state_comment.c $LEXBOR_DIR/html/tokenizer/state_doctype.c $LEXBOR_DIR/html/tokenizer/state_rawtext.c $LEXBOR_DIR/html/tokenizer/state_rcdata.c $LEXBOR_DIR/html/tokenizer/state_script.c $LEXBOR_DIR/html/tokenizer/state.c \
+ $LEXBOR_DIR/html/tree/active_formatting.c $LEXBOR_DIR/html/tree/error.c $LEXBOR_DIR/html/tree/insertion_mode/after_after_body.c $LEXBOR_DIR/html/tree/insertion_mode/after_after_frameset.c $LEXBOR_DIR/html/tree/insertion_mode/after_body.c $LEXBOR_DIR/html/tree/insertion_mode/after_frameset.c $LEXBOR_DIR/html/tree/insertion_mode/after_head.c $LEXBOR_DIR/html/tree/insertion_mode/before_head.c $LEXBOR_DIR/html/tree/insertion_mode/before_html.c $LEXBOR_DIR/html/tree/insertion_mode/foreign_content.c $LEXBOR_DIR/html/tree/insertion_mode/in_body.c $LEXBOR_DIR/html/tree/insertion_mode/in_caption.c $LEXBOR_DIR/html/tree/insertion_mode/in_cell.c $LEXBOR_DIR/html/tree/insertion_mode/in_column_group.c $LEXBOR_DIR/html/tree/insertion_mode/in_frameset.c $LEXBOR_DIR/html/tree/insertion_mode/in_head.c $LEXBOR_DIR/html/tree/insertion_mode/in_head_noscript.c $LEXBOR_DIR/html/tree/insertion_mode/initial.c $LEXBOR_DIR/html/tree/insertion_mode/in_row.c $LEXBOR_DIR/html/tree/insertion_mode/in_select.c $LEXBOR_DIR/html/tree/insertion_mode/in_select_in_table.c $LEXBOR_DIR/html/tree/insertion_mode/in_table_body.c $LEXBOR_DIR/html/tree/insertion_mode/in_table.c $LEXBOR_DIR/html/tree/insertion_mode/in_table_text.c $LEXBOR_DIR/html/tree/insertion_mode/in_template.c $LEXBOR_DIR/html/tree/insertion_mode/text.c $LEXBOR_DIR/html/tree/open_elements.c \
+ $LEXBOR_DIR/encoding/big5.c $LEXBOR_DIR/encoding/decode.c $LEXBOR_DIR/encoding/encode.c $LEXBOR_DIR/encoding/encoding.c $LEXBOR_DIR/encoding/euc_kr.c $LEXBOR_DIR/encoding/gb18030.c $LEXBOR_DIR/encoding/iso_2022_jp_katakana.c $LEXBOR_DIR/encoding/jis0208.c $LEXBOR_DIR/encoding/jis0212.c $LEXBOR_DIR/encoding/range.c $LEXBOR_DIR/encoding/res.c $LEXBOR_DIR/encoding/single.c \
+ $LEXBOR_DIR/html/encoding.c $LEXBOR_DIR/html/interface.c $LEXBOR_DIR/html/parser.c $LEXBOR_DIR/html/token.c $LEXBOR_DIR/html/token_attr.c $LEXBOR_DIR/html/tokenizer.c $LEXBOR_DIR/html/tree.c \
+ $LEXBOR_DIR/html/interfaces/anchor_element.c $LEXBOR_DIR/html/interfaces/area_element.c $LEXBOR_DIR/html/interfaces/audio_element.c $LEXBOR_DIR/html/interfaces/base_element.c $LEXBOR_DIR/html/interfaces/body_element.c $LEXBOR_DIR/html/interfaces/br_element.c $LEXBOR_DIR/html/interfaces/button_element.c $LEXBOR_DIR/html/interfaces/canvas_element.c $LEXBOR_DIR/html/interfaces/data_element.c $LEXBOR_DIR/html/interfaces/data_list_element.c $LEXBOR_DIR/html/interfaces/details_element.c $LEXBOR_DIR/html/interfaces/dialog_element.c $LEXBOR_DIR/html/interfaces/directory_element.c $LEXBOR_DIR/html/interfaces/div_element.c $LEXBOR_DIR/html/interfaces/d_list_element.c $LEXBOR_DIR/html/interfaces/document.c $LEXBOR_DIR/html/interfaces/element.c $LEXBOR_DIR/html/interfaces/embed_element.c $LEXBOR_DIR/html/interfaces/field_set_element.c $LEXBOR_DIR/html/interfaces/font_element.c $LEXBOR_DIR/html/interfaces/form_element.c $LEXBOR_DIR/html/interfaces/frame_element.c $LEXBOR_DIR/html/interfaces/frame_set_element.c $LEXBOR_DIR/html/interfaces/head_element.c $LEXBOR_DIR/html/interfaces/heading_element.c $LEXBOR_DIR/html/interfaces/hr_element.c $LEXBOR_DIR/html/interfaces/html_element.c $LEXBOR_DIR/html/interfaces/iframe_element.c $LEXBOR_DIR/html/interfaces/image_element.c $LEXBOR_DIR/html/interfaces/input_element.c $LEXBOR_DIR/html/interfaces/label_element.c $LEXBOR_DIR/html/interfaces/legend_element.c $LEXBOR_DIR/html/interfaces/li_element.c $LEXBOR_DIR/html/interfaces/link_element.c $LEXBOR_DIR/html/interfaces/map_element.c $LEXBOR_DIR/html/interfaces/marquee_element.c $LEXBOR_DIR/html/interfaces/media_element.c $LEXBOR_DIR/html/interfaces/menu_element.c $LEXBOR_DIR/html/interfaces/meta_element.c $LEXBOR_DIR/html/interfaces/meter_element.c $LEXBOR_DIR/html/interfaces/mod_element.c $LEXBOR_DIR/html/interfaces/object_element.c $LEXBOR_DIR/html/interfaces/o_list_element.c $LEXBOR_DIR/html/interfaces/opt_group_element.c $LEXBOR_DIR/html/interfaces/option_element.c $LEXBOR_DIR/html/interfaces/output_element.c $LEXBOR_DIR/html/interfaces/paragraph_element.c $LEXBOR_DIR/html/interfaces/param_element.c $LEXBOR_DIR/html/interfaces/picture_element.c $LEXBOR_DIR/html/interfaces/pre_element.c $LEXBOR_DIR/html/interfaces/progress_element.c $LEXBOR_DIR/html/interfaces/quote_element.c $LEXBOR_DIR/html/interfaces/script_element.c $LEXBOR_DIR/html/interfaces/select_element.c $LEXBOR_DIR/html/interfaces/slot_element.c $LEXBOR_DIR/html/interfaces/source_element.c $LEXBOR_DIR/html/interfaces/span_element.c $LEXBOR_DIR/html/interfaces/style_element.c $LEXBOR_DIR/html/interfaces/table_caption_element.c $LEXBOR_DIR/html/interfaces/table_cell_element.c $LEXBOR_DIR/html/interfaces/table_col_element.c $LEXBOR_DIR/html/interfaces/table_element.c $LEXBOR_DIR/html/interfaces/table_row_element.c $LEXBOR_DIR/html/interfaces/table_section_element.c $LEXBOR_DIR/html/interfaces/template_element.c $LEXBOR_DIR/html/interfaces/text_area_element.c $LEXBOR_DIR/html/interfaces/time_element.c $LEXBOR_DIR/html/interfaces/title_element.c $LEXBOR_DIR/html/interfaces/track_element.c $LEXBOR_DIR/html/interfaces/u_list_element.c $LEXBOR_DIR/html/interfaces/unknown_element.c $LEXBOR_DIR/html/interfaces/video_element.c $LEXBOR_DIR/html/interfaces/window.c \
+ $LEXBOR_DIR/selectors/selectors.c \
+ $LEXBOR_DIR/ns/ns.c \
+ $LEXBOR_DIR/tag/tag.c"
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c \
+ html5_document.c html5_serializer.c html5_parser.c namespace_compat.c \
domexception.c parentnode.c \
processinginstruction.c cdatasection.c \
documentfragment.c domimplementation.c \
@@ -21,8 +35,9 @@ if test "$PHP_DOM" != "no"; then
nodelist.c text.c comment.c \
entityreference.c \
notation.c xpath.c dom_iterators.c \
- namednodemap.c],
- $ext_shared)
+ namednodemap.c \
+ $LEXBOR_SOURCES],
+ $ext_shared,,$PHP_LEXBOR_CFLAGS)
PHP_SUBST(DOM_SHARED_LIBADD)
PHP_INSTALL_HEADERS([ext/dom/xml_common.h])
PHP_ADD_EXTENSION_DEP(dom, libxml)
diff --git a/ext/dom/config.w32 b/ext/dom/config.w32
index 7795445019e1a..b663b64c69a5f 100644
--- a/ext/dom/config.w32
+++ b/ext/dom/config.w32
@@ -8,13 +8,29 @@ if (PHP_DOM == "yes") {
CHECK_HEADER_ADD_INCLUDE("libxml/parser.h", "CFLAGS_DOM", PHP_PHP_BUILD + "\\include\\libxml2")
) {
EXTENSION("dom", "php_dom.c attr.c document.c \
+ html5_document.c html5_serializer.c html5_parser.c namespace_compat.c \
domexception.c parentnode.c processinginstruction.c \
cdatasection.c documentfragment.c domimplementation.c element.c \
node.c characterdata.c documenttype.c \
entity.c nodelist.c text.c comment.c \
entityreference.c \
notation.c xpath.c dom_iterators.c \
- namednodemap.c");
+ namednodemap.c", null, "-Iext/dom/lexbor");
+
+ ADD_SOURCES("ext/dom/lexbor/lexbor/ports/windows_nt/lexbor/core", "memory.c", "dom");
+ ADD_SOURCES("ext/dom/lexbor/lexbor/core", "array_obj.c array.c avl.c bst.c diyfp.c conv.c dobject.c dtoa.c hash.c mem.c mraw.c print.c serialize.c shs.c str.c strtod.c", "dom");
+ ADD_SOURCES("ext/dom/lexbor/lexbor/dom", "interface.c", "dom");
+ ADD_SOURCES("ext/dom/lexbor/lexbor/dom/interfaces", "attr.c cdata_section.c character_data.c comment.c document.c document_fragment.c document_type.c element.c node.c processing_instruction.c shadow_root.c text.c", "dom");
+ ADD_SOURCES("ext/dom/lexbor/lexbor/html/tokenizer", "error.c state_comment.c state_doctype.c state_rawtext.c state_rcdata.c state_script.c state.c", "dom");
+ ADD_SOURCES("ext/dom/lexbor/lexbor/html/tree", "active_formatting.c open_elements.c error.c", "dom");
+ ADD_SOURCES("ext/dom/lexbor/lexbor/html/tree/insertion_mode", "after_after_body.c after_after_frameset.c after_body.c after_frameset.c after_head.c before_head.c before_html.c foreign_content.c in_body.c in_caption.c in_cell.c in_column_group.c in_frameset.c in_head.c in_head_noscript.c initial.c in_row.c in_select.c in_select_in_table.c in_table_body.c in_table.c in_table_text.c in_template.c text.c", "dom");
+ ADD_SOURCES("ext/dom/lexbor/lexbor/html", "encoding.c interface.c parser.c token.c token_attr.c tokenizer.c tree.c", "dom");
+ ADD_SOURCES("ext/dom/lexbor/lexbor/encoding", "big5.c decode.c encode.c encoding.c euc_kr.c gb18030.c iso_2022_jp_katakana.c jis0208.c jis0212.c range.c res.c single.c", "dom");
+ ADD_SOURCES("ext/dom/lexbor/lexbor/html/interfaces", "anchor_element.c area_element.c audio_element.c base_element.c body_element.c br_element.c button_element.c canvas_element.c data_element.c data_list_element.c details_element.c dialog_element.c directory_element.c div_element.c d_list_element.c document.c element.c embed_element.c field_set_element.c font_element.c form_element.c frame_element.c frame_set_element.c head_element.c heading_element.c hr_element.c html_element.c iframe_element.c image_element.c input_element.c label_element.c legend_element.c li_element.c link_element.c map_element.c marquee_element.c media_element.c menu_element.c meta_element.c meter_element.c mod_element.c object_element.c o_list_element.c opt_group_element.c option_element.c output_element.c paragraph_element.c param_element.c picture_element.c pre_element.c progress_element.c quote_element.c script_element.c select_element.c slot_element.c source_element.c span_element.c style_element.c table_caption_element.c table_cell_element.c table_col_element.c table_element.c table_row_element.c table_section_element.c template_element.c text_area_element.c time_element.c title_element.c track_element.c u_list_element.c unknown_element.c video_element.c window.c", "dom");
+ ADD_SOURCES("ext/dom/lexbor/lexbor/selectors", "selectors.c", "dom");
+ ADD_SOURCES("ext/dom/lexbor/lexbor/ns", "ns.c", "dom");
+ ADD_SOURCES("ext/dom/lexbor/lexbor/tag", "tag.c", "dom");
+ ADD_FLAG("CFLAGS_DOM", "/D LEXBOR_STATIC ");
AC_DEFINE("HAVE_DOM", 1, "DOM support");
diff --git a/ext/dom/document.c b/ext/dom/document.c
index 2aebbccca719d..cbaa29fb775c4 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -35,9 +35,6 @@ struct _idsIterator {
xmlNode *element;
};
-#define DOM_LOAD_STRING 0
-#define DOM_LOAD_FILE 1
-
/*
* class DOMDocument extends DOMNode
*
@@ -794,7 +791,7 @@ PHP_METHOD(DOMDocument, importNode)
if (nodep->doc == docp) {
retnodep = nodep;
} else {
- retnodep = dom_clone_node(nodep, docp, recursive);
+ retnodep = dom_clone_node(nodep, docp, intern, recursive);
if (!retnodep) {
RETURN_FALSE;
}
@@ -1101,8 +1098,7 @@ PHP_METHOD(DOMDocument, normalizeDocument)
}
/* }}} end dom_document_normalize_document */
-/* {{{ */
-PHP_METHOD(DOMDocument, __construct)
+void php_dom_document_constructor(INTERNAL_FUNCTION_PARAMETERS)
{
xmlDoc *docp = NULL, *olddoc;
dom_object *intern;
@@ -1141,6 +1137,12 @@ PHP_METHOD(DOMDocument, __construct)
}
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)docp, (void *)intern);
}
+
+/* {{{ */
+PHP_METHOD(DOMDocument, __construct)
+{
+ php_dom_document_constructor(INTERNAL_FUNCTION_PARAM_PASSTHRU);
+}
/* }}} end DOMDocument::__construct */
char *_dom_get_valid_file_path(char *source, char *resolved_path, int resolved_path_len ) /* {{{ */
@@ -1313,7 +1315,7 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so
}
/* }}} */
-static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPtr newdoc)
+void php_dom_finish_loading_document(zval *this, zval *return_value, xmlDocPtr newdoc)
{
if (!newdoc)
RETURN_FALSE;
@@ -1321,6 +1323,7 @@ static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPt
dom_object *intern = Z_DOMOBJ_P(this);
size_t old_modification_nr = 0;
if (intern != NULL) {
+ bool is_html5_class = intern->document->is_html5_class;
xmlDocPtr docp = (xmlDocPtr) dom_object_get_node(intern);
dom_doc_propsptr doc_prop = NULL;
if (docp != NULL) {
@@ -1340,6 +1343,7 @@ static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPt
RETURN_FALSE;
}
intern->document->doc_props = doc_prop;
+ intern->document->is_html5_class = is_html5_class;
}
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
@@ -1352,8 +1356,7 @@ static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPt
RETURN_TRUE;
}
-/* {{{ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) */
-static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {
+void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode, xmlDocPtr *doc_out) {
char *source;
size_t source_len;
zend_long options = 0;
@@ -1376,17 +1379,18 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {
}
xmlDocPtr newdoc = dom_document_parser(ZEND_THIS, mode, source, source_len, options);
+ *doc_out = newdoc;
- dom_finish_loading_document(ZEND_THIS, return_value, newdoc);
+ php_dom_finish_loading_document(ZEND_THIS, return_value, newdoc);
}
-/* }}} end dom_parser_document */
/* {{{ URL: http://www.w3.org/TR/DOM-Level-3-LS/load-save.html#LS-DocumentLS-load
Since: DOM Level 3
*/
PHP_METHOD(DOMDocument, load)
{
- dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
+ xmlDocPtr unused;
+ dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE, &unused);
}
/* }}} end dom_document_load */
@@ -1395,7 +1399,8 @@ Since: DOM Level 3
*/
PHP_METHOD(DOMDocument, loadXML)
{
- dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
+ xmlDocPtr unused;
+ dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING, &unused);
}
/* }}} end dom_document_loadxml */
@@ -1917,7 +1922,7 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
xmlDocPtr newdoc = ctxt->myDoc;
htmlFreeParserCtxt(ctxt);
- dom_finish_loading_document(ZEND_THIS, return_value, newdoc);
+ php_dom_finish_loading_document(ZEND_THIS, return_value, newdoc);
}
/* }}} */
diff --git a/ext/dom/dom_ce.h b/ext/dom/dom_ce.h
index b0faf3934df52..399e21d2900ce 100644
--- a/ext/dom/dom_ce.h
+++ b/ext/dom/dom_ce.h
@@ -23,6 +23,7 @@ extern PHP_DOM_EXPORT zend_class_entry *dom_domexception_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_domimplementation_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_documentfragment_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_document_class_entry;
+extern PHP_DOM_EXPORT zend_class_entry *dom_html5_document_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_nodelist_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_namednodemap_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_characterdata_class_entry;
diff --git a/ext/dom/dom_properties.h b/ext/dom/dom_properties.h
index 5116c310570e2..a5a144734e45a 100644
--- a/ext/dom/dom_properties.h
+++ b/ext/dom/dom_properties.h
@@ -61,6 +61,9 @@ zend_result dom_document_recover_write(dom_object *obj, zval *newval);
zend_result dom_document_substitue_entities_read(dom_object *obj, zval *retval);
zend_result dom_document_substitue_entities_write(dom_object *obj, zval *newval);
+/* html5 document properties */
+zend_result dom_html5_document_encoding_write(dom_object *obj, zval *retval);
+
/* documenttype properties */
zend_result dom_documenttype_name_read(dom_object *obj, zval *retval);
zend_result dom_documenttype_entities_read(dom_object *obj, zval *retval);
diff --git a/ext/dom/html5_document.c b/ext/dom/html5_document.c
new file mode 100644
index 0000000000000..9fb4d4411d39f
--- /dev/null
+++ b/ext/dom/html5_document.c
@@ -0,0 +1,1024 @@
+/*
+ +----------------------------------------------------------------------+
+ | Copyright (c) The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | https://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Niels Dossche |
+ +----------------------------------------------------------------------+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "php.h"
+#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
+#include "php_dom.h"
+#include "html5_parser.h"
+#include "html5_serializer.h"
+#include "namespace_compat.h"
+#include
+#include
+#include
+
+/* Implementation defined, but as HTML5 defaults in all other cases to UTF-8, we'll do the same. */
+#define DOM_FALLBACK_ENCODING_NAME "UTF-8"
+#define DOM_FALLBACK_ENCODING_ID LXB_ENCODING_UTF_8
+
+typedef struct {
+ size_t last_line;
+ size_t last_column;
+ size_t last_offset;
+} dom_line_column_cache;
+
+typedef struct {
+ const char *input_name;
+ const lxb_codepoint_t *current_input_codepoints;
+ const char *current_input_characters;
+ size_t current_input_length;
+ size_t current_total_offset;
+ dom_line_column_cache cache_tokenizer;
+} dom_lexbor_libxml2_bridge_application_data;
+
+typedef struct {
+ const lxb_encoding_data_t *encoding_data;
+ size_t bom_shift;
+} dom_character_encoding_data;
+
+typedef zend_result (*dom_write_output)(void*, const char *, size_t);
+
+typedef struct {
+ const lxb_encoding_data_t *encoding_data;
+ const lxb_encoding_data_t *decoding_data;
+ lxb_encoding_encode_t *encode;
+ lxb_encoding_decode_t *decode;
+ lxb_codepoint_t *codepoints;
+ lxb_char_t *encoding_output;
+ void *output_data;
+ dom_write_output write_output;
+} dom_output_ctx;
+
+typedef struct {
+ /* We can skip some conversion if the input and output encoding are both UTF-8, we only have to validate and substitute replacement characters */
+ bool fast_path; /* Put first, close to the encode & decode structures, for cache locality */
+ lxb_encoding_encode_t encode;
+ lxb_encoding_decode_t decode;
+ const lxb_encoding_data_t *encode_data;
+ const lxb_encoding_data_t *decode_data;
+ lxb_char_t encoding_output[4096];
+ lxb_codepoint_t codepoints[4096];
+} dom_decoding_encoding_ctx;
+
+static void dom_decoding_encoding_ctx_init(dom_decoding_encoding_ctx *ctx)
+{
+ ctx->encode_data = lxb_encoding_data(LXB_ENCODING_UTF_8);
+ ctx->decode_data = NULL;
+ /* Set fast path on by default so that the decoder finishing is skipped if this was never initialised properly. */
+ ctx->fast_path = true;
+ (void) lxb_encoding_encode_init(&ctx->encode, ctx->encode_data, ctx->encoding_output, sizeof(ctx->encoding_output) / sizeof(lxb_char_t));
+ (void) lxb_encoding_encode_replace_set(&ctx->encode, LXB_ENCODING_REPLACEMENT_BYTES, LXB_ENCODING_REPLACEMENT_SIZE);
+}
+
+static const char *dom_lexbor_tokenizer_error_code_to_string(lxb_html_tokenizer_error_id_t id)
+{
+ switch (id) {
+ case LXB_HTML_TOKENIZER_ERROR_ABCLOFEMCO: return "abrupt-closing-of-empty-comment";
+ case LXB_HTML_TOKENIZER_ERROR_ABDOPUID: return "abrupt-doctype-public-identifier";
+ case LXB_HTML_TOKENIZER_ERROR_ABDOSYID: return "abrupt-doctype-system-identifier";
+ case LXB_HTML_TOKENIZER_ERROR_ABOFDIINNUCHRE: return "absence-of-digits-in-numeric-character-reference";
+ case LXB_HTML_TOKENIZER_ERROR_CDINHTCO: return "cdata-in-html-content";
+ case LXB_HTML_TOKENIZER_ERROR_CHREOUUNRA: return "character-reference-outside-unicode-range";
+ case LXB_HTML_TOKENIZER_ERROR_COCHININST: return "control-character-in-input-stream";
+ case LXB_HTML_TOKENIZER_ERROR_COCHRE: return "control-character-reference";
+ case LXB_HTML_TOKENIZER_ERROR_ENTAWIAT: return "end-tag-with-attributes";
+ case LXB_HTML_TOKENIZER_ERROR_DUAT: return "duplicate-attribute";
+ case LXB_HTML_TOKENIZER_ERROR_ENTAWITRSO: return "end-tag-with-trailing-solidus";
+ case LXB_HTML_TOKENIZER_ERROR_EOBETANA: return "eof-before-tag-name";
+ case LXB_HTML_TOKENIZER_ERROR_EOINCD: return "eof-in-cdata";
+ case LXB_HTML_TOKENIZER_ERROR_EOINCO: return "eof-in-comment";
+ case LXB_HTML_TOKENIZER_ERROR_EOINDO: return "eof-in-doctype";
+ case LXB_HTML_TOKENIZER_ERROR_EOINSCHTCOLITE: return "eof-in-script-html-comment-like-text";
+ case LXB_HTML_TOKENIZER_ERROR_EOINTA: return "eof-in-tag";
+ case LXB_HTML_TOKENIZER_ERROR_INCLCO: return "incorrectly-closed-comment";
+ case LXB_HTML_TOKENIZER_ERROR_INOPCO: return "incorrectly-opened-comment";
+ case LXB_HTML_TOKENIZER_ERROR_INCHSEAFDONA: return "invalid-character-sequence-after-doctype-name";
+ case LXB_HTML_TOKENIZER_ERROR_INFICHOFTANA: return "invalid-first-character-of-tag-name";
+ case LXB_HTML_TOKENIZER_ERROR_MIATVA: return "missing-attribute-value";
+ case LXB_HTML_TOKENIZER_ERROR_MIDONA: return "missing-doctype-name";
+ case LXB_HTML_TOKENIZER_ERROR_MIDOPUID: return "missing-doctype-public-identifier";
+ case LXB_HTML_TOKENIZER_ERROR_MIDOSYID: return "missing-doctype-system-identifier";
+ case LXB_HTML_TOKENIZER_ERROR_MIENTANA: return "missing-end-tag-name";
+ case LXB_HTML_TOKENIZER_ERROR_MIQUBEDOPUID: return "missing-quote-before-doctype-public-identifier";
+ case LXB_HTML_TOKENIZER_ERROR_MIQUBEDOSYID: return "missing-quote-before-doctype-system-identifier";
+ case LXB_HTML_TOKENIZER_ERROR_MISEAFCHRE: return "missing-semicolon-after-character-reference";
+ case LXB_HTML_TOKENIZER_ERROR_MIWHAFDOPUKE: return "missing-whitespace-after-doctype-public-keyword";
+ case LXB_HTML_TOKENIZER_ERROR_MIWHAFDOSYKE: return "missing-whitespace-after-doctype-system-keyword";
+ case LXB_HTML_TOKENIZER_ERROR_MIWHBEDONA: return "missing-whitespace-before-doctype-name";
+ case LXB_HTML_TOKENIZER_ERROR_MIWHBEAT: return "missing-whitespace-between-attributes";
+ case LXB_HTML_TOKENIZER_ERROR_MIWHBEDOPUANSYID: return "missing-whitespace-between-doctype-public-and-system-identifiers";
+ case LXB_HTML_TOKENIZER_ERROR_NECO: return "nested-comment";
+ case LXB_HTML_TOKENIZER_ERROR_NOCHRE: return "noncharacter-character-reference";
+ case LXB_HTML_TOKENIZER_ERROR_NOININST: return "noncharacter-in-input-stream";
+ case LXB_HTML_TOKENIZER_ERROR_NOVOHTELSTTAWITRSO: return "non-void-html-element-start-tag-with-trailing-solidus";
+ case LXB_HTML_TOKENIZER_ERROR_NUCHRE: return "null-character-reference";
+ case LXB_HTML_TOKENIZER_ERROR_SUCHRE: return "surrogate-character-reference";
+ case LXB_HTML_TOKENIZER_ERROR_SUININST: return "surrogate-in-input-stream";
+ case LXB_HTML_TOKENIZER_ERROR_UNCHAFDOSYID: return "unexpected-character-after-doctype-system-identifier";
+ case LXB_HTML_TOKENIZER_ERROR_UNCHINATNA: return "unexpected-character-in-attribute-name";
+ case LXB_HTML_TOKENIZER_ERROR_UNCHINUNATVA: return "unexpected-character-in-unquoted-attribute-value";
+ case LXB_HTML_TOKENIZER_ERROR_UNEQSIBEATNA: return "unexpected-equals-sign-before-attribute-name";
+ case LXB_HTML_TOKENIZER_ERROR_UNNUCH: return "unexpected-null-character";
+ case LXB_HTML_TOKENIZER_ERROR_UNQUMAINOFTANA: return "unexpected-question-mark-instead-of-tag-name";
+ case LXB_HTML_TOKENIZER_ERROR_UNSOINTA: return "unexpected-solidus-in-tag";
+ case LXB_HTML_TOKENIZER_ERROR_UNNACHRE: return "unknown-named-character-reference";
+ default: return "unknown error";
+ }
+}
+
+static const char *dom_lexbor_tree_error_code_to_string(lxb_html_tree_error_id_t id)
+{
+ switch (id) {
+ case LXB_HTML_RULES_ERROR_UNTO: return "unexpected-token";
+ case LXB_HTML_RULES_ERROR_UNCLTO: return "unexpected-closed-token";
+ case LXB_HTML_RULES_ERROR_NUCH: return "null-character";
+ case LXB_HTML_RULES_ERROR_UNCHTO: return "unexpected-character-token";
+ case LXB_HTML_RULES_ERROR_UNTOININMO: return "unexpected-token-in-initial-mode";
+ case LXB_HTML_RULES_ERROR_BADOTOININMO: return "bad-doctype-token-in-initial-mode";
+ case LXB_HTML_RULES_ERROR_DOTOINBEHTMO: return "doctype-token-in-before-html-mode";
+ case LXB_HTML_RULES_ERROR_UNCLTOINBEHTMO: return "unexpected-closed-token-in-before-html-mode";
+ case LXB_HTML_RULES_ERROR_DOTOINBEHEMO: return "doctype-token-in-before-head-mode";
+ case LXB_HTML_RULES_ERROR_UNCLTOINBEHEMO: return "unexpected-closed_token-in-before-head-mode";
+ case LXB_HTML_RULES_ERROR_DOTOINHEMO: return "doctype-token-in-head-mode";
+ case LXB_HTML_RULES_ERROR_NOVOHTELSTTAWITRSO: return "non-void-html-element-start-tag-with-trailing-solidus";
+ case LXB_HTML_RULES_ERROR_HETOINHEMO: return "head-token-in-head-mode";
+ case LXB_HTML_RULES_ERROR_UNCLTOINHEMO: return "unexpected-closed-token-in-head-mode";
+ case LXB_HTML_RULES_ERROR_TECLTOWIOPINHEMO: return "template-closed-token-without-opening-in-head-mode";
+ case LXB_HTML_RULES_ERROR_TEELISNOCUINHEMO: return "template-element-is-not-current-in-head-mode";
+ case LXB_HTML_RULES_ERROR_DOTOINHENOMO: return "doctype-token-in-head-noscript-mode";
+ case LXB_HTML_RULES_ERROR_DOTOAFHEMO: return "doctype-token-after-head-mode";
+ case LXB_HTML_RULES_ERROR_HETOAFHEMO: return "head-token-after-head-mode";
+ case LXB_HTML_RULES_ERROR_DOTOINBOMO: return "doctype-token-in-body-mode";
+ case LXB_HTML_RULES_ERROR_BAENOPELISWR: return "bad-ending-open-elements-is-wrong";
+ case LXB_HTML_RULES_ERROR_OPELISWR: return "open-elements-is-wrong";
+ case LXB_HTML_RULES_ERROR_UNELINOPELST: return "unexpected-element-in-open-elements-stack";
+ case LXB_HTML_RULES_ERROR_MIELINOPELST: return "missing-element-in-open-elements-stack";
+ case LXB_HTML_RULES_ERROR_NOBOELINSC: return "no-body-element-in-scope";
+ case LXB_HTML_RULES_ERROR_MIELINSC: return "missing-element-in-scope";
+ case LXB_HTML_RULES_ERROR_UNELINSC: return "unexpected-element-in-scope";
+ case LXB_HTML_RULES_ERROR_UNELINACFOST: return "unexpected-element-in-active-formatting-stack";
+ case LXB_HTML_RULES_ERROR_UNENOFFI: return "unexpected-end-of-file";
+ case LXB_HTML_RULES_ERROR_CHINTATE: return "characters-in-table-text";
+ case LXB_HTML_RULES_ERROR_DOTOINTAMO: return "doctype-token-in-table-mode";
+ case LXB_HTML_RULES_ERROR_DOTOINSEMO: return "doctype-token-in-select-mode";
+ case LXB_HTML_RULES_ERROR_DOTOAFBOMO: return "doctype-token-after-body-mode";
+ case LXB_HTML_RULES_ERROR_DOTOINFRMO: return "doctype-token-in-frameset-mode";
+ case LXB_HTML_RULES_ERROR_DOTOAFFRMO: return "doctype-token-after-frameset-mode";
+ case LXB_HTML_RULES_ERROR_DOTOFOCOMO: return "doctype-token-foreign-content-mode";
+ default: return "unknown error";
+ }
+}
+
+static const char *dom_lexbor_libxml2_bridge_status_code_to_string(lexbor_libxml2_bridge_status status)
+{
+ switch (status) {
+ case LEXBOR_LIBXML2_BRIDGE_STATUS_CANNOT_INIT: return "cannot initialize data structures";
+ case LEXBOR_LIBXML2_BRIDGE_STATUS_FATAL_PARSE: return "fatal error in parsing";
+ case LEXBOR_LIBXML2_BRIDGE_STATUS_OVERFLOW: return "string length overflow";
+ case LEXBOR_LIBXML2_BRIDGE_STATUS_OOM: return "out of memory";
+ default: return "unknown error";
+ }
+}
+
+static void dom_reset_line_column_cache(dom_line_column_cache *cache)
+{
+ cache->last_line = 1;
+ cache->last_column = 1;
+ cache->last_offset = 0;
+}
+
+static void dom_find_line_and_column_using_cache(const dom_lexbor_libxml2_bridge_application_data *application_data, dom_line_column_cache *cache, size_t offset)
+{
+ offset -= application_data->current_total_offset;
+ if (offset > application_data->current_input_length) {
+ /* Possible with empty input, also just good for general safety */
+ offset = application_data->current_input_length;
+ }
+
+ /* Either unicode or UTF-8 data */
+ if (application_data->current_input_codepoints != NULL) {
+ while (cache->last_offset < offset) {
+ if (application_data->current_input_codepoints[cache->last_offset] == 0x000A) {
+ cache->last_line++;
+ cache->last_column = 1;
+ } else {
+ cache->last_column++;
+ }
+ cache->last_offset++;
+ }
+ } else {
+ while (cache->last_offset < offset) {
+ const lxb_char_t current = application_data->current_input_characters[cache->last_offset];
+ if (current == '\n') {
+ cache->last_line++;
+ cache->last_column = 1;
+ cache->last_offset++;
+ } else {
+ /* See Lexbor tokenizer patch
+ * Note for future self: branchlessly computing the length and jumping by the length would be nice,
+ * however it takes so many instructions to do so that it is slower than this naive method. */
+ if ((current & 0b11000000) != 0b10000000) {
+ cache->last_column++;
+ }
+ cache->last_offset++;
+ }
+ }
+ }
+}
+
+static void dom_lexbor_libxml2_bridge_tokenizer_error_reporter(void *application_data_voidptr, lxb_html_tokenizer_error_t *error, size_t offset)
+{
+ dom_lexbor_libxml2_bridge_application_data *application_data = application_data_voidptr;
+ dom_find_line_and_column_using_cache(application_data, &application_data->cache_tokenizer, offset);
+ php_libxml_pretend_ctx_error_ex(application_data->cache_tokenizer.last_line, application_data->cache_tokenizer.last_column, "tokenizer error %s in %s, line: %zu, column: %zu\n", dom_lexbor_tokenizer_error_code_to_string(error->id), application_data->input_name, application_data->cache_tokenizer.last_line, application_data->cache_tokenizer.last_column);
+}
+
+static void dom_lexbor_libxml2_bridge_tree_error_reporter(void *application_data_voidptr, lxb_html_tree_error_t *error, size_t line, size_t column, size_t len)
+{
+ dom_lexbor_libxml2_bridge_application_data *application_data = application_data_voidptr;
+ if (UNEXPECTED(len <= 1)) {
+ /* Possible with EOF, or single-character tokens, don't use a range in the error display in this case */
+ php_libxml_pretend_ctx_error_ex(line, column, "tree error %s in %s, line: %zu, column: %zu\n", dom_lexbor_tree_error_code_to_string(error->id), application_data->input_name, line, column);
+ } else {
+ php_libxml_pretend_ctx_error_ex(line, column, "tree error %s in %s, line: %zu, column: %zu-%zu\n", dom_lexbor_tree_error_code_to_string(error->id), application_data->input_name, line, column, column + len - 1);
+ }
+}
+
+static xmlNodePtr dom_search_child(xmlNodePtr parent, const char *searching_for)
+{
+ xmlNodePtr node = parent->children;
+ while (node != NULL) {
+ if (node->type == XML_ELEMENT_NODE && strcmp((const char *) node->name, searching_for) == 0) {
+ return node;
+ }
+ node = node->next;
+ }
+ return NULL;
+}
+
+static void dom_place_remove_element_and_hoist_children(xmlNodePtr parent, const char *searching_for)
+{
+ xmlNodePtr node = dom_search_child(parent, searching_for);
+ if (node != NULL) {
+ xmlUnlinkNode(node);
+
+ xmlNodePtr child = node->children;
+ while (child != NULL) {
+ xmlUnlinkNode(child);
+ xmlAddChild(parent, child);
+ child = node->children;
+ }
+
+ xmlFreeNode(node);
+ }
+}
+
+static void dom_post_process_html5_loading(xmlDocPtr lxml_doc, zend_long options, const lexbor_libxml2_bridge_extracted_observations *observations)
+{
+ if (options & HTML_PARSE_NOIMPLIED) {
+ xmlNodePtr html_node = dom_search_child((xmlNodePtr) lxml_doc, "html");
+ if (!observations->has_explicit_head_tag) {
+ dom_place_remove_element_and_hoist_children(html_node, "head");
+ }
+ if (!observations->has_explicit_body_tag) {
+ dom_place_remove_element_and_hoist_children(html_node, "body");
+ }
+ if (!observations->has_explicit_html_tag) {
+ /* The HTML node has a single namespace declaration, that we must preserve after removing the node.
+ * However, it's possible the namespace is NULL if DOM\HTML_NO_DEFAULT_NS was set. */
+ if (!(options & DOM_HTML_NO_DEFAULT_NS)) {
+ php_libxml_set_old_ns(lxml_doc, html_node->nsDef);
+ html_node->nsDef = NULL;
+ }
+ dom_place_remove_element_and_hoist_children((xmlNodePtr) lxml_doc, "html");
+ if (!(options & DOM_HTML_NO_DEFAULT_NS) && EXPECTED(lxml_doc->children != NULL)) {
+ dom_reconcile_ns_list(lxml_doc, lxml_doc->children, lxml_doc->last);
+ }
+ }
+ }
+}
+
+/* https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding */
+static dom_character_encoding_data dom_determine_encoding(const char *source, size_t source_len)
+{
+ dom_character_encoding_data result;
+
+ /* BOM sniffing */
+ if (source_len >= 3 && source[0] == '\xEF' && source[1] == '\xBB' && source[2] == '\xBF') {
+ result.encoding_data = lxb_encoding_data(LXB_ENCODING_UTF_8);
+ result.bom_shift = 3;
+ return result;
+ } else if (source_len >= 2) {
+ if (source[0] == '\xFE' && source[1] == '\xFF') {
+ result.encoding_data = lxb_encoding_data(LXB_ENCODING_UTF_16BE);
+ result.bom_shift = 2;
+ return result;
+ } else if (source[0] == '\xFF' && source[1] == '\xFE') {
+ result.encoding_data = lxb_encoding_data(LXB_ENCODING_UTF_16LE);
+ result.bom_shift = 2;
+ return result;
+ }
+ }
+
+ /* Perform prescan */
+ lxb_html_encoding_t encoding;
+ lxb_status_t status = lxb_html_encoding_init(&encoding);
+ if (status != LXB_STATUS_OK) {
+ goto fallback;
+ }
+ /* This is the "wait either for 1024 bytes or 500ms" part */
+ if (source_len > 1024) {
+ source_len = 1024;
+ }
+ status = lxb_html_encoding_determine(&encoding, (const lxb_char_t *) source, (const lxb_char_t *) source + source_len);
+ if (status != LXB_STATUS_OK) {
+ goto fallback;
+ }
+ lxb_html_encoding_entry_t *entry = lxb_html_encoding_meta_entry(&encoding, 0);
+ if (entry == NULL) {
+ goto fallback;
+ }
+ result.encoding_data = lxb_encoding_data_by_pre_name(entry->name, entry->end - entry->name);
+ result.bom_shift = 0;
+ lxb_html_encoding_destroy(&encoding, false);
+ return result;
+
+fallback:
+ result.encoding_data = lxb_encoding_data(DOM_FALLBACK_ENCODING_ID);
+ result.bom_shift = 0;
+ lxb_html_encoding_destroy(&encoding, false);
+ return result;
+}
+
+static void dom_setup_parser_encoding(const lxb_char_t **buf_ref, size_t *read, dom_decoding_encoding_ctx *decoding_encoding_ctx)
+{
+ static const lxb_codepoint_t replacement_codepoint = LXB_ENCODING_REPLACEMENT_CODEPOINT;
+ dom_character_encoding_data dom_encoding_data = dom_determine_encoding((const char *) *buf_ref, *read);
+ *buf_ref += dom_encoding_data.bom_shift;
+ *read -= dom_encoding_data.bom_shift;
+
+ decoding_encoding_ctx->decode_data = dom_encoding_data.encoding_data;
+ if (decoding_encoding_ctx->decode_data == NULL) {
+ decoding_encoding_ctx->decode_data = lxb_encoding_data(DOM_FALLBACK_ENCODING_ID);
+ ZEND_ASSERT(decoding_encoding_ctx->decode_data != NULL);
+ }
+ (void) lxb_encoding_decode_init(&decoding_encoding_ctx->decode, decoding_encoding_ctx->decode_data, decoding_encoding_ctx->codepoints, sizeof(decoding_encoding_ctx->codepoints) / sizeof(lxb_codepoint_t));
+ (void) lxb_encoding_decode_replace_set(&decoding_encoding_ctx->decode, &replacement_codepoint, LXB_ENCODING_REPLACEMENT_BUFFER_LEN);
+ decoding_encoding_ctx->fast_path = decoding_encoding_ctx->decode_data == decoding_encoding_ctx->encode_data; /* Note: encode_data is for UTF-8 */
+}
+
+static bool dom_process_parse_chunk(lexbor_libxml2_bridge_parse_context *ctx, lxb_html_document_t *document, lxb_html_parser_t *parser, size_t encoded_length, const lxb_char_t *encoding_output, size_t input_buffer_length, size_t *tokenizer_error_offset, size_t *tree_error_offset)
+{
+ dom_lexbor_libxml2_bridge_application_data *application_data = ctx->application_data;
+ application_data->current_input_length = input_buffer_length;
+ lexbor_status_t lexbor_status = lxb_html_document_parse_chunk(document, encoding_output, encoded_length);
+ if (UNEXPECTED(lexbor_status != LXB_STATUS_OK)) {
+ return false;
+ }
+ lexbor_libxml2_bridge_report_errors(ctx, parser, encoding_output, application_data->current_total_offset, tokenizer_error_offset, tree_error_offset);
+ dom_find_line_and_column_using_cache(application_data, &application_data->cache_tokenizer, application_data->current_total_offset + input_buffer_length);
+ application_data->current_total_offset += input_buffer_length;
+ application_data->cache_tokenizer.last_offset = 0;
+ return true;
+}
+
+static bool dom_decode_encode_fast_path(lexbor_libxml2_bridge_parse_context *ctx, lxb_html_document_t *document, lxb_html_parser_t *parser, const lxb_char_t **buf_ref_ref, const lxb_char_t *buf_end, dom_decoding_encoding_ctx *decoding_encoding_ctx, size_t *tokenizer_error_offset, size_t *tree_error_offset)
+{
+ const lxb_char_t *buf_ref = *buf_ref_ref;
+ const lxb_char_t *last_output = buf_ref;
+ while (buf_ref != buf_end) {
+ const lxb_char_t *buf_ref_backup = buf_ref;
+ lxb_codepoint_t codepoint = decoding_encoding_ctx->decode_data->decode_single(&decoding_encoding_ctx->decode, &buf_ref, buf_end);
+ if (UNEXPECTED(codepoint > LXB_ENCODING_MAX_CODEPOINT)) {
+ size_t skip = buf_ref - buf_ref_backup; /* Skip invalid data, it's replaced by the UTF-8 replacement bytes */
+ if (!dom_process_parse_chunk(ctx, document, parser, buf_ref - last_output - skip, last_output, buf_ref - last_output, tokenizer_error_offset, tree_error_offset)) {
+ goto fail_oom;
+ }
+ if (!dom_process_parse_chunk(ctx, document, parser, LXB_ENCODING_REPLACEMENT_SIZE, LXB_ENCODING_REPLACEMENT_BYTES, 0, tokenizer_error_offset, tree_error_offset)) {
+ goto fail_oom;
+ }
+ last_output = buf_ref;
+ }
+ }
+ if (buf_ref != last_output && !dom_process_parse_chunk(ctx, document, parser, buf_ref - last_output, last_output, buf_ref - last_output, tokenizer_error_offset, tree_error_offset)) {
+ goto fail_oom;
+ }
+ *buf_ref_ref = buf_ref;
+ return true;
+fail_oom:
+ *buf_ref_ref = buf_ref;
+ return false;
+}
+
+static bool dom_decode_encode_slow_path(lexbor_libxml2_bridge_parse_context *ctx, lxb_html_document_t *document, lxb_html_parser_t *parser, const lxb_char_t **buf_ref_ref, const lxb_char_t *buf_end, dom_decoding_encoding_ctx *decoding_encoding_ctx, size_t *tokenizer_error_offset, size_t *tree_error_offset)
+{
+ const lxb_char_t *buf_ref = *buf_ref_ref;
+ lexbor_status_t decode_status, encode_status;
+ do {
+ decode_status = decoding_encoding_ctx->decode_data->decode(&decoding_encoding_ctx->decode, &buf_ref, buf_end);
+
+ const lxb_codepoint_t *codepoints_ref = (const lxb_codepoint_t *) decoding_encoding_ctx->codepoints;
+ size_t decoding_buffer_used = lxb_encoding_decode_buf_used(&decoding_encoding_ctx->decode);
+ const lxb_codepoint_t *codepoints_end = decoding_encoding_ctx->codepoints + decoding_buffer_used;
+ do {
+ encode_status = decoding_encoding_ctx->encode_data->encode(&decoding_encoding_ctx->encode, &codepoints_ref, codepoints_end);
+ ZEND_ASSERT(encode_status != LXB_STATUS_ERROR && "parameters and replacements should be valid");
+ if (!dom_process_parse_chunk(ctx, document, parser, lxb_encoding_encode_buf_used(&decoding_encoding_ctx->encode), decoding_encoding_ctx->encoding_output, decoding_buffer_used, tokenizer_error_offset, tree_error_offset)) {
+ goto fail_oom;
+ }
+ lxb_encoding_encode_buf_used_set(&decoding_encoding_ctx->encode, 0);
+ } while (encode_status == LXB_STATUS_SMALL_BUFFER);
+ lxb_encoding_decode_buf_used_set(&decoding_encoding_ctx->decode, 0);
+ } while (decode_status == LXB_STATUS_SMALL_BUFFER);
+ *buf_ref_ref = buf_ref;
+ return true;
+fail_oom:
+ *buf_ref_ref = buf_ref;
+ return false;
+}
+
+static bool dom_parse_decode_encode_step(lexbor_libxml2_bridge_parse_context *ctx, lxb_html_document_t *document, lxb_html_parser_t *parser, const lxb_char_t **buf_ref_ref, const lxb_char_t *buf_end, dom_decoding_encoding_ctx *decoding_encoding_ctx, size_t *tokenizer_error_offset, size_t *tree_error_offset)
+{
+ if (decoding_encoding_ctx->fast_path) {
+ return dom_decode_encode_fast_path(ctx, document, parser, buf_ref_ref, buf_end, decoding_encoding_ctx, tokenizer_error_offset, tree_error_offset);
+ } else {
+ return dom_decode_encode_slow_path(ctx, document, parser, buf_ref_ref, buf_end, decoding_encoding_ctx, tokenizer_error_offset, tree_error_offset);
+ }
+}
+
+static bool dom_parse_decode_encode_finish(lexbor_libxml2_bridge_parse_context *ctx, lxb_html_document_t *document, lxb_html_parser_t *parser, dom_decoding_encoding_ctx *decoding_encoding_ctx, size_t *tokenizer_error_offset, size_t *tree_error_offset)
+{
+ if (!decoding_encoding_ctx->fast_path) {
+ /* Fast path handles codepoints one by one, so this part is not applicable in that case */
+ (void) lxb_encoding_decode_finish(&decoding_encoding_ctx->decode);
+ size_t decoding_buffer_size = lxb_encoding_decode_buf_used(&decoding_encoding_ctx->decode);
+ if (decoding_buffer_size > 0) {
+ const lxb_codepoint_t *codepoints_ref = (const lxb_codepoint_t *) decoding_encoding_ctx->codepoints;
+ const lxb_codepoint_t *codepoints_end = codepoints_ref + decoding_buffer_size;
+ (void) decoding_encoding_ctx->encode_data->encode(&decoding_encoding_ctx->encode, &codepoints_ref, codepoints_end);
+ if (!dom_process_parse_chunk(ctx, document, parser, lxb_encoding_encode_buf_used(&decoding_encoding_ctx->encode), decoding_encoding_ctx->encoding_output, decoding_buffer_size, tokenizer_error_offset, tree_error_offset)) {
+ return false;
+ }
+ }
+ }
+ (void) lxb_encoding_encode_finish(&decoding_encoding_ctx->encode);
+ if (lxb_encoding_encode_buf_used(&decoding_encoding_ctx->encode) && !dom_process_parse_chunk(ctx, document, parser, lxb_encoding_encode_buf_used(&decoding_encoding_ctx->encode), decoding_encoding_ctx->encoding_output, lxb_encoding_decode_buf_used(&decoding_encoding_ctx->decode), tokenizer_error_offset, tree_error_offset)) {
+ return false;
+ }
+ return true;
+}
+
+static bool check_options_validity(zend_long options)
+{
+ const zend_long VALID_OPTIONS = XML_PARSE_NOERROR | XML_PARSE_COMPACT | HTML_PARSE_NOIMPLIED | DOM_HTML_NO_DEFAULT_NS;
+ if ((options & ~VALID_OPTIONS) != 0) {
+ zend_argument_value_error(2, "contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\\NO_DEFAULT_NS)");
+ return false;
+ }
+ return true;
+}
+
+PHP_METHOD(DOM_HTML5Document, loadHTML)
+{
+ const char *source;
+ size_t source_len;
+ zend_long options = 0;
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &source, &source_len, &options) == FAILURE) {
+ RETURN_THROWS();
+ }
+
+ if (!check_options_validity(options)) {
+ RETURN_THROWS();
+ }
+
+ dom_lexbor_libxml2_bridge_application_data application_data;
+ application_data.input_name = "Entity";
+ application_data.current_total_offset = 0;
+ dom_reset_line_column_cache(&application_data.cache_tokenizer);
+ lexbor_libxml2_bridge_parse_context ctx;
+ lexbor_libxml2_bridge_parse_context_init(&ctx);
+ if (!(options & XML_PARSE_NOERROR)) {
+ lexbor_libxml2_bridge_parse_set_error_callbacks(&ctx, dom_lexbor_libxml2_bridge_tokenizer_error_reporter, dom_lexbor_libxml2_bridge_tree_error_reporter);
+ }
+ ctx.application_data = &application_data;
+
+ lxb_html_document_t *document = lxb_html_document_create();
+ if (UNEXPECTED(document == NULL)) {
+ goto fail_oom;
+ }
+
+ lxb_status_t lexbor_status = lxb_html_document_parse_chunk_begin(document);
+ if (UNEXPECTED(lexbor_status != LXB_STATUS_OK)) {
+ goto fail_oom;
+ }
+
+ /* Setup everything encoding & decoding related */
+ dom_decoding_encoding_ctx decoding_encoding_ctx;
+ dom_decoding_encoding_ctx_init(&decoding_encoding_ctx);
+
+ lxb_html_parser_t *parser = document->dom_document.parser;
+ size_t tokenizer_error_offset = 0;
+ size_t tree_error_offset = 0;
+
+ const lxb_char_t *buf_ref = (const lxb_char_t *) source;
+ dom_setup_parser_encoding(&buf_ref, &source_len, &decoding_encoding_ctx);
+
+ if (decoding_encoding_ctx.fast_path) {
+ application_data.current_input_codepoints = NULL;
+ application_data.current_input_characters = source;
+ } else {
+ application_data.current_input_codepoints = decoding_encoding_ctx.codepoints;
+ application_data.current_input_characters = NULL;
+ }
+
+ while (source_len > 0) {
+ size_t chunk_size = source_len;
+ if (chunk_size > sizeof(decoding_encoding_ctx.encoding_output) / sizeof(lxb_char_t)) {
+ chunk_size = sizeof(decoding_encoding_ctx.encoding_output) / sizeof(lxb_char_t);
+ }
+ source_len -= chunk_size;
+
+ const lxb_char_t *buf_end = buf_ref + chunk_size;
+ bool result = dom_parse_decode_encode_step(&ctx, document, parser, &buf_ref, buf_end, &decoding_encoding_ctx, &tokenizer_error_offset, &tree_error_offset);
+ if (!result) {
+ goto fail_oom;
+ }
+ }
+
+ if (!dom_parse_decode_encode_finish(&ctx, document, parser, &decoding_encoding_ctx, &tokenizer_error_offset, &tree_error_offset)) {
+ goto fail_oom;
+ }
+
+ lexbor_status = lxb_html_document_parse_chunk_end(document);
+ if (lexbor_status != LXB_STATUS_OK) {
+ goto fail_oom;
+ }
+
+ xmlDocPtr lxml_doc;
+ lexbor_libxml2_bridge_status bridge_status = lexbor_libxml2_bridge_convert_document(document, &lxml_doc, options & XML_PARSE_COMPACT, !(options & DOM_HTML_NO_DEFAULT_NS));
+ lexbor_libxml2_bridge_copy_observations(parser->tree, &ctx.observations);
+ if (UNEXPECTED(bridge_status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
+ php_libxml_ctx_error(NULL, "%s in %s", dom_lexbor_libxml2_bridge_status_code_to_string(bridge_status), application_data.input_name);
+ lxb_html_document_destroy(document);
+ RETURN_FALSE;
+ }
+ lxb_html_document_destroy(document);
+
+ dom_post_process_html5_loading(lxml_doc, options, &ctx.observations);
+
+ if (decoding_encoding_ctx.decode_data) {
+ lxml_doc->encoding = xmlStrdup((const xmlChar *) decoding_encoding_ctx.decode_data->name);
+ } else {
+ lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
+ }
+
+ php_dom_finish_loading_document(ZEND_THIS, return_value, lxml_doc);
+ return;
+
+fail_oom:
+ lxb_html_document_destroy(document);
+ php_dom_throw_error(INVALID_STATE_ERR, 1);
+ RETURN_THROWS();
+}
+
+PHP_METHOD(DOM_HTML5Document, loadHTMLFile)
+{
+ const char *filename;
+ size_t filename_len;
+ zend_long options = 0;
+ php_stream *stream = NULL;
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "p|l", &filename, &filename_len, &options) == FAILURE) {
+ RETURN_THROWS();
+ }
+
+ if (!check_options_validity(options)) {
+ RETURN_THROWS();
+ }
+
+ /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */
+ if (strstr(filename, "%00")) {
+ php_error_docref(NULL, E_WARNING, "URI must not contain percent-encoded NUL bytes");
+ RETURN_FALSE;
+ }
+
+ dom_lexbor_libxml2_bridge_application_data application_data;
+ application_data.input_name = filename;
+ application_data.current_total_offset = 0;
+ dom_reset_line_column_cache(&application_data.cache_tokenizer);
+ lexbor_libxml2_bridge_parse_context ctx;
+ lexbor_libxml2_bridge_parse_context_init(&ctx);
+ if (!(options & XML_PARSE_NOERROR)) {
+ lexbor_libxml2_bridge_parse_set_error_callbacks(&ctx, dom_lexbor_libxml2_bridge_tokenizer_error_reporter, dom_lexbor_libxml2_bridge_tree_error_reporter);
+ }
+ ctx.application_data = &application_data;
+
+ lxb_html_document_t *document = lxb_html_document_create();
+ if (UNEXPECTED(document == NULL)) {
+ goto fail_oom;
+ }
+
+ lxb_status_t lexbor_status = lxb_html_document_parse_chunk_begin(document);
+ if (UNEXPECTED(lexbor_status != LXB_STATUS_OK)) {
+ goto fail_oom;
+ }
+
+ stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, /* opened_path */ NULL, /* context */ NULL);
+ if (!stream) {
+ lxb_html_document_destroy(document);
+ RETURN_FALSE;
+ }
+
+ /* Setup everything encoding & decoding related */
+ bool first_read = true;
+ dom_decoding_encoding_ctx decoding_encoding_ctx;
+ dom_decoding_encoding_ctx_init(&decoding_encoding_ctx);
+
+ size_t tokenizer_error_offset = 0;
+ size_t tree_error_offset = 0;
+ ssize_t read;
+ char buf[4096];
+ lxb_html_parser_t *parser = document->dom_document.parser;
+
+ while ((read = php_stream_read(stream, buf, sizeof(buf))) > 0) {
+ const lxb_char_t *buf_ref = (const lxb_char_t *) buf;
+
+ /* First read => determine encoding */
+ if (first_read) {
+ first_read = false;
+ dom_setup_parser_encoding(&buf_ref, (size_t *) &read, &decoding_encoding_ctx);
+ if (decoding_encoding_ctx.fast_path) {
+ application_data.current_input_codepoints = NULL;
+ application_data.current_input_characters = buf;
+ } else {
+ application_data.current_input_codepoints = decoding_encoding_ctx.codepoints;
+ application_data.current_input_characters = NULL;
+ }
+ }
+
+ const lxb_char_t *buf_end = buf_ref + read;
+ bool result = dom_parse_decode_encode_step(&ctx, document, parser, &buf_ref, buf_end, &decoding_encoding_ctx, &tokenizer_error_offset, &tree_error_offset);
+ if (!result) {
+ goto fail_oom;
+ }
+ }
+
+ php_stream_close(stream);
+ stream = NULL;
+
+ if (!dom_parse_decode_encode_finish(&ctx, document, parser, &decoding_encoding_ctx, &tokenizer_error_offset, &tree_error_offset)) {
+ goto fail_oom;
+ }
+
+ lexbor_status = lxb_html_document_parse_chunk_end(document);
+ if (lexbor_status != LXB_STATUS_OK) {
+ goto fail_oom;
+ }
+
+ xmlDocPtr lxml_doc;
+ lexbor_libxml2_bridge_status bridge_status = lexbor_libxml2_bridge_convert_document(document, &lxml_doc, options & XML_PARSE_COMPACT, !(options & DOM_HTML_NO_DEFAULT_NS));
+ lexbor_libxml2_bridge_copy_observations(parser->tree, &ctx.observations);
+ if (UNEXPECTED(bridge_status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
+ php_libxml_ctx_error(NULL, "%s in %s", dom_lexbor_libxml2_bridge_status_code_to_string(bridge_status), filename);
+ lxb_html_document_destroy(document);
+ RETURN_FALSE;
+ }
+ lxb_html_document_destroy(document);
+
+ dom_post_process_html5_loading(lxml_doc, options, &ctx.observations);
+
+ if (decoding_encoding_ctx.decode_data) {
+ lxml_doc->encoding = xmlStrdup((const xmlChar *) decoding_encoding_ctx.decode_data->name);
+ } else {
+ lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
+ }
+
+ php_dom_finish_loading_document(ZEND_THIS, return_value, lxml_doc);
+ return;
+
+fail_oom:
+ php_dom_throw_error(INVALID_STATE_ERR, 1);
+ lxb_html_document_destroy(document);
+ if (stream) {
+ php_stream_close(stream);
+ }
+ RETURN_THROWS();
+}
+
+/* Living spec never creates explicit namespace declaration nodes.
+ * They are only written upon serialization but never appear in the tree.
+ * So in principle we could just ignore them outright.
+ * However, step 10 in https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token
+ * requires us to have the declaration as an attribute available */
+static void dom_mark_namespaces_as_attributes_too(xmlDocPtr doc)
+{
+ if (!doc) {
+ return;
+ }
+
+ xmlNodePtr node = doc->children;
+ while (node != NULL) {
+ if (node->type == XML_ELEMENT_NODE) {
+ dom_ns_compat_mark_attribute_list(node->nsDef);
+
+ if (node->children) {
+ node = node->children;
+ continue;
+ }
+ }
+
+ if (node->next) {
+ node = node->next;
+ } else {
+ /* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */
+ do {
+ node = node->parent;
+ if (node == NULL) {
+ return;
+ }
+ } while (node->next == NULL);
+ node = node->next;
+ }
+ }
+}
+
+void dom_mark_namespaces_for_copy_based_on_copy(xmlNodePtr copy, const xmlNode *original)
+{
+ xmlNodePtr copy_current = copy;
+ const xmlNode *original_current = original;
+ while (copy_current != NULL) {
+ ZEND_ASSERT(original_current != NULL);
+
+ if (copy_current->type == XML_ELEMENT_NODE) {
+ dom_ns_compat_copy_attribute_list_mark(copy_current->nsDef, original_current->nsDef);
+
+ if (copy_current->children) {
+ copy_current = copy_current->children;
+ original_current = original_current->children;
+ continue;
+ }
+ }
+
+ if (copy_current->next) {
+ copy_current = copy_current->next;
+ original_current = original_current->next;
+ } else {
+ /* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */
+ do {
+ copy_current = copy_current->parent;
+ if (copy_current == NULL) {
+ return;
+ }
+ original_current = original_current->parent;
+ } while (copy_current->next == NULL);
+ copy_current = copy_current->next;
+ original_current = original_current->next;
+ }
+ }
+}
+
+static zend_result dom_write_output_smart_str(void *ctx, const char *buf, size_t size)
+{
+ smart_str_appendl((smart_str *) ctx, buf, size);
+ return SUCCESS;
+}
+
+static zend_result dom_write_output_stream(void *application_data, const char *buf, size_t len)
+{
+ php_stream *stream = (php_stream *) application_data;
+ if (UNEXPECTED(php_stream_write(stream, buf, len) < 0)) {
+ return FAILURE;
+ }
+ return SUCCESS;
+}
+
+static zend_result dom_saveHTML_write_string_len(void *application_data, const char *buf, size_t len)
+{
+ dom_output_ctx *output = (dom_output_ctx *) application_data;
+ lxb_status_t decode_status, encode_status;
+ const lxb_char_t *buf_ref = (const lxb_char_t *) buf;
+ const lxb_char_t *buf_end = buf_ref + len;
+
+ do {
+ decode_status = output->decoding_data->decode(output->decode, &buf_ref, buf_end);
+
+ const lxb_codepoint_t *codepoints_ref = output->codepoints;
+ const lxb_codepoint_t *codepoints_end = codepoints_ref + lxb_encoding_decode_buf_used(output->decode);
+ do {
+ encode_status = output->encoding_data->encode(output->encode, &codepoints_ref, codepoints_end);
+ if (UNEXPECTED(output->write_output(output->output_data, (const char *) output->encoding_output, lxb_encoding_encode_buf_used(output->encode)) != SUCCESS)) {
+ return FAILURE;
+ }
+ lxb_encoding_encode_buf_used_set(output->encode, 0);
+ } while (encode_status == LXB_STATUS_SMALL_BUFFER);
+ lxb_encoding_decode_buf_used_set(output->decode, 0);
+ } while (decode_status == LXB_STATUS_SMALL_BUFFER);
+
+ return SUCCESS;
+}
+
+static zend_result dom_saveHTML_write_string(void *application_data, const char *buf)
+{
+ return dom_saveHTML_write_string_len(application_data, buf, strlen(buf));
+}
+
+static zend_result dom_common_save(dom_output_ctx *output_ctx, const xmlDoc *docp, const xmlNode *node)
+{
+ /* Initialize everything related to encoding & decoding */
+ const lxb_encoding_data_t *decoding_data = lxb_encoding_data(LXB_ENCODING_UTF_8);
+ const lxb_encoding_data_t *encoding_data = NULL;
+ if (docp->encoding != NULL) {
+ encoding_data = lxb_encoding_data_by_name((const lxb_char_t *) docp->encoding, strlen((const char *) docp->encoding));
+ }
+ if (encoding_data == NULL) {
+ encoding_data = lxb_encoding_data(DOM_FALLBACK_ENCODING_ID);
+ ZEND_ASSERT(encoding_data != NULL);
+ }
+ lxb_encoding_encode_t encode;
+ lxb_encoding_decode_t decode;
+ lxb_char_t encoding_output[4096];
+ lxb_codepoint_t codepoints[4096];
+ (void) lxb_encoding_encode_init(&encode, encoding_data, encoding_output, sizeof(encoding_output) / sizeof(lxb_char_t));
+ (void) lxb_encoding_decode_init(&decode, decoding_data, codepoints, sizeof(codepoints) / sizeof(lxb_codepoint_t));
+ if (encoding_data->encoding == LXB_ENCODING_UTF_8) {
+ lxb_encoding_encode_replace_set(&encode, LXB_ENCODING_REPLACEMENT_BYTES, LXB_ENCODING_REPLACEMENT_SIZE);
+ } else {
+ /* Fallback if there is no replacement by default */
+ lxb_encoding_encode_replace_set(&encode, (const lxb_char_t *) "?", 1);
+ }
+ lxb_encoding_decode_replace_set(&decode, LXB_ENCODING_REPLACEMENT_BUFFER, LXB_ENCODING_REPLACEMENT_BUFFER_LEN);
+
+ output_ctx->encoding_data = encoding_data;
+ output_ctx->decoding_data = decoding_data;
+ output_ctx->encode = &encode;
+ output_ctx->decode = &decode;
+ output_ctx->codepoints = codepoints;
+ output_ctx->encoding_output = encoding_output;
+
+ dom_html5_serialize_context ctx;
+ ctx.write_string_len = dom_saveHTML_write_string_len;
+ ctx.write_string = dom_saveHTML_write_string;
+ ctx.application_data = output_ctx;
+ if (UNEXPECTED(dom_html5_serialize(&ctx, node) != SUCCESS)) {
+ return FAILURE;
+ }
+
+ (void) lxb_encoding_decode_finish(&decode);
+ if (lxb_encoding_decode_buf_used(&decode)) {
+ const lxb_codepoint_t *codepoints_ref = (const lxb_codepoint_t *) codepoints;
+ (void) encoding_data->encode(&encode, &codepoints_ref, codepoints_ref + lxb_encoding_decode_buf_used(&decode));
+ if (UNEXPECTED(output_ctx->write_output(output_ctx->output_data, (const char *) encoding_output, lxb_encoding_encode_buf_used(&encode)) != SUCCESS)) {
+ return FAILURE;
+ }
+ }
+ (void) lxb_encoding_encode_finish(&encode);
+ if (lxb_encoding_encode_buf_used(&encode)) {
+ if (UNEXPECTED(output_ctx->write_output(output_ctx->output_data, (const char *) encoding_output, lxb_encoding_encode_buf_used(&encode)) != SUCCESS)) {
+ return FAILURE;
+ }
+ }
+
+ return SUCCESS;
+}
+
+PHP_METHOD(DOM_HTML5Document, saveHTMLFile)
+{
+ zval *id;
+ xmlDoc *docp;
+ size_t file_len;
+ dom_object *intern;
+ char *file;
+
+ id = ZEND_THIS;
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "p", &file, &file_len) == FAILURE) {
+ RETURN_THROWS();
+ }
+
+ if (file_len == 0) {
+ zend_argument_value_error(1, "must not be empty");
+ RETURN_THROWS();
+ }
+
+ php_stream *stream = php_stream_open_wrapper_ex(file, "wb", REPORT_ERRORS, /* opened_path */ NULL, /* context */ NULL);
+ if (!stream) {
+ RETURN_FALSE;
+ }
+
+ DOM_GET_OBJ(docp, id, xmlDocPtr, intern);
+
+ dom_output_ctx output_ctx;
+ output_ctx.output_data = stream;
+ output_ctx.write_output = dom_write_output_stream;
+ if (UNEXPECTED(dom_common_save(&output_ctx, docp, (const xmlNode *) docp) != SUCCESS)) {
+ php_stream_close(stream);
+ RETURN_FALSE;
+ }
+
+ zend_long bytes = php_stream_tell(stream);
+ php_stream_close(stream);
+
+ RETURN_LONG(bytes);
+}
+
+PHP_METHOD(DOM_HTML5Document, saveHTML)
+{
+ zval *nodep = NULL;
+ const xmlDoc *docp;
+ const xmlNode *node;
+ dom_object *intern, *nodeobj;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "|O!", &nodep, dom_node_class_entry) == FAILURE) {
+ RETURN_THROWS();
+ }
+
+ DOM_GET_OBJ(docp, ZEND_THIS, xmlDocPtr, intern);
+
+ if (nodep != NULL) {
+ DOM_GET_OBJ(node, nodep, xmlNodePtr, nodeobj);
+ if (node->doc != docp) {
+ php_dom_throw_error(WRONG_DOCUMENT_ERR, dom_get_strict_error(intern->document));
+ RETURN_FALSE;
+ }
+ } else {
+ node = (const xmlNode *) docp;
+ }
+
+ smart_str buf = {0};
+ dom_output_ctx output_ctx;
+ output_ctx.output_data = &buf;
+ output_ctx.write_output = dom_write_output_smart_str;
+ /* Can't fail because dom_write_output_smart_str() can't fail. */
+ zend_result result = dom_common_save(&output_ctx, docp, node);
+ ZEND_ASSERT(result == SUCCESS);
+
+ RETURN_STR(smart_str_extract(&buf));
+}
+
+PHP_METHOD(DOM_HTML5Document, __construct)
+{
+ php_dom_document_constructor(INTERNAL_FUNCTION_PARAM_PASSTHRU);
+ Z_DOMOBJ_P(ZEND_THIS)->document->is_html5_class = true;
+}
+
+PHP_METHOD(DOM_HTML5Document, load)
+{
+ xmlDocPtr doc = NULL;
+ dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE, &doc);
+ dom_mark_namespaces_as_attributes_too(doc);
+}
+
+PHP_METHOD(DOM_HTML5Document, loadXML)
+{
+ xmlDocPtr doc = NULL;
+ dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING, &doc);
+ dom_mark_namespaces_as_attributes_too(doc);
+}
+
+zend_result dom_html5_document_encoding_write(dom_object *obj, zval *newval)
+{
+ xmlDoc *docp = (xmlDocPtr) dom_object_get_node(obj);
+ if (docp == NULL) {
+ php_dom_throw_error(INVALID_STATE_ERR, 1);
+ return FAILURE;
+ }
+
+ /* Typed property, can only be IS_STRING or IS_NULL. */
+ ZEND_ASSERT(Z_TYPE_P(newval) == IS_STRING || Z_TYPE_P(newval) == IS_NULL);
+
+ if (Z_TYPE_P(newval) == IS_NULL) {
+ goto invalid_encoding;
+ }
+
+ zend_string *str = Z_STR_P(newval);
+ const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name((const lxb_char_t *) ZSTR_VAL(str), ZSTR_LEN(str));
+
+ if (encoding_data != NULL) {
+ xmlFree((xmlChar *) docp->encoding);
+ docp->encoding = xmlStrdup((const xmlChar *) encoding_data->name);
+ } else {
+ goto invalid_encoding;
+ }
+
+ return SUCCESS;
+
+invalid_encoding:
+ zend_value_error("Invalid document encoding");
+ return FAILURE;
+}
+
+#endif /* HAVE_LIBXML && HAVE_DOM */
diff --git a/ext/dom/html5_parser.c b/ext/dom/html5_parser.c
new file mode 100644
index 0000000000000..bddccd17b153b
--- /dev/null
+++ b/ext/dom/html5_parser.c
@@ -0,0 +1,262 @@
+/*
+ +----------------------------------------------------------------------+
+ | Copyright (c) The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | https://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Niels Dossche |
+ +----------------------------------------------------------------------+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "php.h"
+#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
+#include "html5_parser.h"
+#include "namespace_compat.h"
+#include
+#include
+#include
+#include
+#include
+#include
+
+typedef struct {
+ lxb_dom_node_t *node;
+ uintptr_t current_active_namespace;
+ xmlNodePtr lxml_parent;
+ xmlNsPtr lxml_ns;
+} work_list_item;
+
+static void lexbor_libxml2_bridge_work_list_item_push(lexbor_array_obj_t *array, lxb_dom_node_t *node, uintptr_t current_active_namespace, xmlNodePtr lxml_parent, xmlNsPtr lxml_ns)
+{
+ work_list_item *item = (work_list_item *) lexbor_array_obj_push_wo_cls(array);
+ item->node = node;
+ item->current_active_namespace = current_active_namespace;
+ item->lxml_parent = lxml_parent;
+ item->lxml_ns = lxml_ns;
+}
+
+static unsigned short sanitize_line_nr(size_t line)
+{
+ if (line > USHRT_MAX) {
+ return USHRT_MAX;
+ }
+ return (unsigned short) line;
+}
+
+static const xmlChar *get_libxml_namespace_href(uintptr_t lexbor_namespace)
+{
+ if (lexbor_namespace == LXB_NS_SVG) {
+ return (const xmlChar *) DOM_SVG_NS_URI;
+ } else if (lexbor_namespace == LXB_NS_MATH) {
+ return (const xmlChar *) DOM_MATHML_NS_URI;
+ } else {
+ return (const xmlChar *) DOM_XHTML_NS_URI;
+ }
+}
+
+static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(lxb_dom_node_t *start_node, xmlDocPtr lxml_doc, bool compact_text_nodes, bool create_default_ns)
+{
+ lexbor_libxml2_bridge_status retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
+
+ lexbor_array_obj_t work_list;
+ lexbor_array_obj_init(&work_list, 128, sizeof(work_list_item));
+
+ for (lxb_dom_node_t *node = start_node; node != NULL; node = node->prev) {
+ lexbor_libxml2_bridge_work_list_item_push(&work_list, node, LXB_NS__UNDEF, (xmlNodePtr) lxml_doc, NULL);
+ }
+
+ work_list_item *current_stack_item;
+ while ((current_stack_item = lexbor_array_obj_pop(&work_list)) != NULL) {
+ lxb_dom_node_t *node = current_stack_item->node;
+ xmlNodePtr lxml_parent = current_stack_item->lxml_parent;
+
+ /* CDATA section and processing instructions don't occur in parsed HTML documents.
+ * The historical types are not emitted by the parser either. */
+ if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
+ /* Note: HTML isn't exactly XML-namespace-aware; as this is an HTML parser we only care about the local name.
+ * If a prefix:name format is used, then the local name will be "prefix:name" and the prefix will be empty.
+ * There is however still somewhat of a concept of namespaces. There are three: HTML (the default), SVG, and MATHML. */
+ lxb_dom_element_t *element = lxb_dom_interface_element(node);
+ const lxb_char_t *name = lxb_dom_element_local_name(element, NULL);
+ xmlNodePtr lxml_element = xmlNewDocNode(lxml_doc, NULL, name, NULL);
+ if (UNEXPECTED(lxml_element == NULL)) {
+ retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
+ goto out;
+ }
+ xmlAddChild(lxml_parent, lxml_element);
+ lxml_element->line = sanitize_line_nr(node->line);
+
+ /* Namespaces, note: namespace switches are uncommon */
+ uintptr_t entering_namespace = element->node.ns;
+ xmlNsPtr current_lxml_ns = current_stack_item->lxml_ns;
+ if (create_default_ns && UNEXPECTED(entering_namespace != current_stack_item->current_active_namespace)) {
+ current_lxml_ns = xmlNewNs(lxml_element, get_libxml_namespace_href(entering_namespace), NULL);
+ }
+ lxml_element->ns = current_lxml_ns; /* Instead of xmlSetNs() because we know the arguments are valid. Prevents overhead. */
+
+ for (lxb_dom_node_t *child_node = element->node.last_child; child_node != NULL; child_node = child_node->prev) {
+ lexbor_libxml2_bridge_work_list_item_push(&work_list, child_node, entering_namespace, lxml_element, current_lxml_ns);
+ }
+
+ for (lxb_dom_attr_t *attr = element->last_attr; attr != NULL; attr = attr->prev) {
+ lexbor_libxml2_bridge_work_list_item_push(&work_list, (lxb_dom_node_t *) attr, entering_namespace, lxml_element, current_lxml_ns);
+ }
+ } else if (node->type == LXB_DOM_NODE_TYPE_TEXT) {
+ lxb_dom_text_t *text = lxb_dom_interface_text(node);
+ const lxb_char_t *data = text->char_data.data.data;
+ size_t data_length = text->char_data.data.length;
+ if (UNEXPECTED(data_length >= INT_MAX)) {
+ retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OVERFLOW;
+ goto out;
+ }
+ xmlNodePtr lxml_text;
+ if (compact_text_nodes && data_length < sizeof(void *) * 2) {
+ /* See xmlSAX2TextNode() in libxml2 */
+ lxml_text = xmlMalloc(sizeof(xmlNode));
+ if (UNEXPECTED(lxml_text == NULL)) {
+ retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
+ goto out;
+ }
+ memset(lxml_text, 0, sizeof(xmlNode));
+ lxml_text->name = xmlStringText;
+ lxml_text->type = XML_TEXT_NODE;
+ lxml_text->doc = lxml_doc;
+ lxml_text->content = (xmlChar *) &lxml_text->properties;
+ memcpy(lxml_text->content, data, data_length + 1 /* include '\0' */);
+ } else {
+ lxml_text = xmlNewDocTextLen(lxml_doc, (const xmlChar *) data, data_length);
+ if (UNEXPECTED(lxml_text == NULL)) {
+ retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
+ goto out;
+ }
+ }
+ xmlAddChild(lxml_parent, lxml_text);
+ if (node->line >= USHRT_MAX) {
+ lxml_text->line = USHRT_MAX;
+ lxml_text->psvi = (void *) (ptrdiff_t) node->line;
+ } else {
+ lxml_text->line = (unsigned short) node->line;
+ }
+ } else if (node->type == LXB_DOM_NODE_TYPE_DOCUMENT_TYPE) {
+ lxb_dom_document_type_t *doctype = lxb_dom_interface_document_type(node);
+ const lxb_char_t *name = lxb_dom_document_type_name(doctype, NULL);
+ size_t public_id_len, system_id_len;
+ const lxb_char_t *public_id = lxb_dom_document_type_public_id(doctype, &public_id_len);
+ const lxb_char_t *system_id = lxb_dom_document_type_system_id(doctype, &system_id_len);
+ xmlDtdPtr lxml_dtd = xmlCreateIntSubset(lxml_doc, name, public_id_len ? public_id : NULL, system_id_len ? system_id : NULL);
+ if (UNEXPECTED(lxml_dtd == NULL)) {
+ retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
+ goto out;
+ }
+ /* libxml2 doesn't support line numbers on this anyway, it returns -1 instead, so don't bother */
+ } else if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
+ lxb_dom_attr_t *attr = lxb_dom_interface_attr(node);
+ do {
+ /* Same namespace remark as for elements */
+ const lxb_char_t *local_name = lxb_dom_attr_local_name(attr, NULL);
+ const lxb_char_t *value = lxb_dom_attr_value(attr, NULL);
+ xmlAttrPtr lxml_attr = xmlSetNsProp(lxml_parent, NULL, local_name, value);
+ if (UNEXPECTED(lxml_attr == NULL)) {
+ retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
+ goto out;
+ }
+ attr = attr->next;
+ /* libxml2 doesn't support line numbers on this anyway, it derives them instead, so don't bother */
+ } while (attr);
+ } else if (node->type == LXB_DOM_NODE_TYPE_COMMENT) {
+ lxb_dom_comment_t *comment = lxb_dom_interface_comment(node);
+ xmlNodePtr lxml_comment = xmlNewDocComment(lxml_doc, comment->char_data.data.data);
+ if (UNEXPECTED(lxml_comment == NULL)) {
+ retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
+ goto out;
+ }
+ xmlAddChild(lxml_parent, lxml_comment);
+ lxml_comment->line = sanitize_line_nr(node->line);
+ }
+ }
+
+out:
+ lexbor_array_obj_destroy(&work_list, false);
+ return retval;
+}
+
+void lexbor_libxml2_bridge_parse_context_init(lexbor_libxml2_bridge_parse_context *ctx)
+{
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+void lexbor_libxml2_bridge_parse_set_error_callbacks(lexbor_libxml2_bridge_parse_context *ctx, lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter, lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter)
+{
+ ctx->tokenizer_error_reporter = tokenizer_error_reporter;
+ ctx->tree_error_reporter = tree_error_reporter;
+}
+
+lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(lxb_html_document_t *document, xmlDocPtr *doc_out, bool compact_text_nodes, bool create_default_ns)
+{
+#ifdef LIBXML_HTML_ENABLED
+ xmlDocPtr lxml_doc = htmlNewDocNoDtD(NULL, NULL);
+#else
+ xmlDocPtr lxml_doc = xmlNewDoc((const xmlChar *) "1.0");
+ lxml_doc->type = XML_HTML_DOCUMENT_NODE;
+#endif
+ if (!lxml_doc) {
+ return LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
+ }
+ lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert(lxb_dom_interface_node(document)->last_child, lxml_doc, compact_text_nodes, create_default_ns);
+ if (status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK) {
+ xmlFreeDoc(lxml_doc);
+ return status;
+ }
+ *doc_out = lxml_doc;
+ return LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
+}
+
+void lexbor_libxml2_bridge_report_errors(const lexbor_libxml2_bridge_parse_context *ctx, lxb_html_parser_t *parser, const lxb_char_t *input_html, size_t chunk_offset, size_t *error_index_offset_tokenizer, size_t *error_index_offset_tree)
+{
+ void *error;
+
+ /* Tokenizer errors */
+ lexbor_array_obj_t *parse_errors = lxb_html_parser_tokenizer(parser)->parse_errors;
+ size_t index = *error_index_offset_tokenizer;
+ while ((error = lexbor_array_obj_get(parse_errors, index)) != NULL) {
+ /* See https://github.com/lexbor/lexbor/blob/master/source/lexbor/html/tokenizer/error.h */
+ lxb_html_tokenizer_error_t *token_error = error;
+ if (ctx->tokenizer_error_reporter) {
+ ctx->tokenizer_error_reporter(ctx->application_data, token_error, token_error->pos - input_html + chunk_offset);
+ }
+ index++;
+ }
+ *error_index_offset_tokenizer = index;
+
+ /* Tree parser errors */
+ parse_errors = lxb_html_parser_tree(parser)->parse_errors;
+ index = *error_index_offset_tree;
+ while ((error = lexbor_array_obj_get(parse_errors, index)) != NULL) {
+ /* See https://github.com/lexbor/lexbor/blob/master/source/lexbor/html/tree/error.h */
+ lxb_html_tree_error_t *tree_error = error;
+ if (ctx->tree_error_reporter) {
+ ctx->tree_error_reporter(ctx->application_data, tree_error, tree_error->line + 1, tree_error->column + 1, tree_error->length);
+ }
+ index++;
+ }
+ *error_index_offset_tree = index;
+}
+
+void lexbor_libxml2_bridge_copy_observations(lxb_html_tree_t *tree, lexbor_libxml2_bridge_extracted_observations *observations)
+{
+ observations->has_explicit_html_tag = tree->has_explicit_html_tag;
+ observations->has_explicit_head_tag = tree->has_explicit_head_tag;
+ observations->has_explicit_body_tag = tree->has_explicit_body_tag;
+}
+
+#endif /* HAVE_LIBXML && HAVE_DOM */
diff --git a/ext/dom/html5_parser.h b/ext/dom/html5_parser.h
new file mode 100644
index 0000000000000..e0e5b7b55cbf6
--- /dev/null
+++ b/ext/dom/html5_parser.h
@@ -0,0 +1,57 @@
+/*
+ +----------------------------------------------------------------------+
+ | Copyright (c) The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | https://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Niels Dossche |
+ +----------------------------------------------------------------------+
+*/
+
+#ifndef CONVERT_H
+#define CONVERT_H
+
+#include
+#include
+#include
+
+typedef enum {
+ LEXBOR_LIBXML2_BRIDGE_STATUS_OK = 0,
+ LEXBOR_LIBXML2_BRIDGE_STATUS_CANNOT_INIT,
+ LEXBOR_LIBXML2_BRIDGE_STATUS_FATAL_PARSE,
+ LEXBOR_LIBXML2_BRIDGE_STATUS_OVERFLOW,
+ LEXBOR_LIBXML2_BRIDGE_STATUS_OOM,
+} lexbor_libxml2_bridge_status;
+
+typedef void (*lexbor_libxml2_bridge_tokenizer_error_reporter)(void *application_data, lxb_html_tokenizer_error_t *error, size_t offset);
+typedef void (*lexbor_libxml2_bridge_tree_error_reporter)(void *application_data, lxb_html_tree_error_t *error, size_t line, size_t column, size_t len);
+
+typedef struct {
+ bool has_explicit_html_tag;
+ bool has_explicit_head_tag;
+ bool has_explicit_body_tag;
+} lexbor_libxml2_bridge_extracted_observations;
+
+typedef struct {
+ /* Private fields */
+ lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter;
+ lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter;
+ /* Public fields */
+ lexbor_libxml2_bridge_extracted_observations observations;
+ /* Application data, do what you want with this */
+ void *application_data;
+} lexbor_libxml2_bridge_parse_context;
+
+void lexbor_libxml2_bridge_parse_context_init(lexbor_libxml2_bridge_parse_context *ctx);
+void lexbor_libxml2_bridge_parse_set_error_callbacks(lexbor_libxml2_bridge_parse_context *ctx, lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter, lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter);
+lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(lxb_html_document_t *document, xmlDocPtr *doc_out, bool compact_text_nodes, bool create_default_ns);
+void lexbor_libxml2_bridge_report_errors(const lexbor_libxml2_bridge_parse_context *ctx, lxb_html_parser_t *parser, const lxb_char_t *input_html, size_t chunk_offset, size_t *error_index_offset_tokenizer, size_t *error_index_offset_tree);
+void lexbor_libxml2_bridge_copy_observations(lxb_html_tree_t *tree, lexbor_libxml2_bridge_extracted_observations *observations);
+
+#endif
diff --git a/ext/dom/html5_serializer.c b/ext/dom/html5_serializer.c
new file mode 100644
index 0000000000000..daa2e0ce2ec0a
--- /dev/null
+++ b/ext/dom/html5_serializer.c
@@ -0,0 +1,351 @@
+/*
+ +----------------------------------------------------------------------+
+ | Copyright (c) The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | https://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Niels Dossche |
+ +----------------------------------------------------------------------+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "php.h"
+#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
+#include "php_dom.h"
+#include "html5_serializer.h"
+#include "namespace_compat.h"
+#include
+
+#define TRY(x) do { if (UNEXPECTED((x) != SUCCESS)) { return FAILURE; } } while (0)
+
+static bool dom_is_ns(const xmlNode *node, const char *uri)
+{
+ return node->ns != NULL && strcmp((const char *) node->ns->href, uri) == 0;
+}
+
+static bool dom_is_html_ns(const xmlNode *node)
+{
+ return node->ns == NULL || dom_is_ns(node, DOM_XHTML_NS_URI);
+}
+
+static bool dom_local_name_compare_ex(const xmlNode *node, const char *tag, size_t tag_length, size_t name_length)
+{
+ return name_length == tag_length && zend_binary_strcmp((const char *) node->name, name_length, tag, tag_length) == 0;
+}
+
+static zend_result dom_html5_serialize_doctype(dom_html5_serialize_context *ctx, const xmlDtd *dtd)
+{
+ TRY(ctx->write_string_len(ctx->application_data, "write_string(ctx->application_data, (const char *) dtd->name));
+ return ctx->write_string_len(ctx->application_data, ">", strlen(">"));
+}
+
+static zend_result dom_html5_serialize_comment(dom_html5_serialize_context *ctx, const xmlNode *node)
+{
+ TRY(ctx->write_string_len(ctx->application_data, "", strlen("-->"));
+}
+
+static zend_result dom_html5_serialize_processing_instruction(dom_html5_serialize_context *ctx, const xmlNode *node)
+{
+ TRY(ctx->write_string_len(ctx->application_data, "", strlen("")));
+ TRY(ctx->write_string(ctx->application_data, (const char *) node->name));
+ TRY(ctx->write_string_len(ctx->application_data, " ", strlen(" ")));
+ TRY(ctx->write_string(ctx->application_data, (const char *) node->content));
+ return ctx->write_string_len(ctx->application_data, ">", strlen(">"));
+}
+
+/* https://html.spec.whatwg.org/multipage/parsing.html#escapingString */
+static zend_result dom_html5_escape_string(dom_html5_serialize_context *ctx, const char *content, bool attribute_mode)
+{
+ const char *last_output = content;
+
+ while (*content != '\0') {
+ switch (*content) {
+ /* Step 1 */
+ case '&': {
+ TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
+ TRY(ctx->write_string_len(ctx->application_data, "&", strlen("&")));
+ last_output = content + 1;
+ break;
+ }
+
+ /* Step 2 (non-breaking space) (note: uses UTF-8 internally) */
+ case '\xC2': {
+ if (content[1] == '\xA0') {
+ TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
+ TRY(ctx->write_string_len(ctx->application_data, " ", strlen(" ")));
+ content++; /* Consume A0 too */
+ last_output = content + 1;
+ }
+ break;
+ }
+
+ /* Step 3 */
+ case '"': {
+ if (attribute_mode) {
+ TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
+ TRY(ctx->write_string_len(ctx->application_data, """, strlen(""")));
+ last_output = content + 1;
+ }
+ break;
+ }
+
+ /* Step 4 */
+ case '<': {
+ if (!attribute_mode) {
+ TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
+ TRY(ctx->write_string_len(ctx->application_data, "<", strlen("<")));
+ last_output = content + 1;
+ }
+ break;
+ }
+ case '>': {
+ if (!attribute_mode) {
+ TRY(ctx->write_string_len(ctx->application_data, last_output, content - last_output));
+ TRY(ctx->write_string_len(ctx->application_data, ">", strlen(">")));
+ last_output = content + 1;
+ }
+ break;
+ }
+ }
+
+ content++;
+ }
+
+ return ctx->write_string_len(ctx->application_data, last_output, content - last_output);
+}
+
+static zend_result dom_html5_serialize_text_node(dom_html5_serialize_context *ctx, const xmlNode *node)
+{
+ if (node->parent->type == XML_ELEMENT_NODE && dom_is_html_ns(node->parent)) {
+ const xmlNode *parent = node->parent;
+ size_t name_length = strlen((const char *) parent->name);
+ /* Note: is not handled because scripting is not enabled because the user agent (PHP) does not support (JS) scripting */
+ if (dom_local_name_compare_ex(parent, "style", strlen("style"), name_length)
+ || dom_local_name_compare_ex(parent, "script", strlen("script"), name_length)
+ || dom_local_name_compare_ex(parent, "xmp", strlen("xmp"), name_length)
+ || dom_local_name_compare_ex(parent, "iframe", strlen("iframe"), name_length)
+ || dom_local_name_compare_ex(parent, "noembed", strlen("noembed"), name_length)
+ || dom_local_name_compare_ex(parent, "noframes", strlen("noframes"), name_length)
+ || dom_local_name_compare_ex(parent, "plaintext", strlen("plaintext"), name_length)) {
+ return ctx->write_string(ctx->application_data, (const char *) node->content);
+ }
+ }
+
+ return dom_html5_escape_string(ctx, (const char *) node->content, false);
+}
+
+static zend_result dom_html5_serialize_element_tag_name(dom_html5_serialize_context *ctx, const xmlNode *node)
+{
+ /* Note: it is not the serializer's responsibility to care about uppercase/lowercase (see createElement() note) */
+ if (node->ns != NULL && node->ns->prefix != NULL
+ && !(dom_is_html_ns(node) || dom_is_ns(node, DOM_MATHML_NS_URI) || dom_is_ns(node, DOM_SVG_NS_URI))) {
+ TRY(ctx->write_string(ctx->application_data, (const char *) node->ns->prefix));
+ TRY(ctx->write_string_len(ctx->application_data, ":", strlen(":")));
+ }
+ return ctx->write_string(ctx->application_data, (const char *) node->name);
+}
+
+static zend_result dom_html5_serialize_element_start(dom_html5_serialize_context *ctx, const xmlNode *node)
+{
+ TRY(ctx->write_string_len(ctx->application_data, "<", strlen("<")));
+ TRY(dom_html5_serialize_element_tag_name(ctx, node));
+
+ /* We don't support the "is" value during element creation, so no handling here. */
+
+ /* Some namespace declarations are also attributes (see https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token) */
+ for (const xmlNs *ns = node->nsDef; ns; ns = ns->next) {
+ if (!dom_ns_is_also_an_attribute(ns)) {
+ continue;
+ }
+
+ if (ns->prefix != NULL) {
+ TRY(ctx->write_string_len(ctx->application_data, " xmlns:", strlen(" xmlns:")));
+ TRY(ctx->write_string(ctx->application_data, (const char *) ns->prefix));
+ TRY(ctx->write_string_len(ctx->application_data, "=\"", strlen("=\"")));
+ } else {
+ TRY(ctx->write_string_len(ctx->application_data, " xmlns=\"", strlen(" xmlns=\"")));
+ }
+ TRY(ctx->write_string(ctx->application_data, (const char *) ns->href));
+ TRY(ctx->write_string_len(ctx->application_data, "\"", strlen("\"")));
+ }
+
+ for (const xmlAttr *attr = node->properties; attr; attr = attr->next) {
+ TRY(ctx->write_string_len(ctx->application_data, " ", strlen(" ")));
+ if (attr->ns == NULL) {
+ TRY(ctx->write_string(ctx->application_data, (const char *) attr->name));
+ } else {
+ if (dom_is_ns((const xmlNode *) attr, DOM_XML_NS_URI)) {
+ TRY(ctx->write_string_len(ctx->application_data, "xml:", strlen("xml:")));
+ TRY(ctx->write_string(ctx->application_data, (const char *) attr->name));
+ } else if (dom_is_ns((const xmlNode *) attr, DOM_XMLNS_NS_URI)) {
+ /* Compatibility for real attributes */
+ if (strcmp((const char *) attr->name, "xmlns") == 0) {
+ TRY(ctx->write_string_len(ctx->application_data, "xmlns", strlen("xmlns")));
+ } else {
+ TRY(ctx->write_string_len(ctx->application_data, "xmlns:", strlen("xmlns:")));
+ TRY(ctx->write_string(ctx->application_data, (const char *) attr->name));
+ }
+ } else if (dom_is_ns((const xmlNode *) attr, DOM_XLINK_NS_URI)) {
+ TRY(ctx->write_string_len(ctx->application_data, "xlink:", strlen("xlink:")));
+ TRY(ctx->write_string(ctx->application_data, (const char *) attr->name));
+ } else if (attr->ns->prefix == NULL) {
+ TRY(ctx->write_string(ctx->application_data, (const char *) attr->name));
+ } else {
+ TRY(ctx->write_string(ctx->application_data, (const char *) attr->ns->prefix));
+ TRY(ctx->write_string_len(ctx->application_data, ":", strlen(":")));
+ TRY(ctx->write_string(ctx->application_data, (const char *) attr->name));
+ }
+ }
+ TRY(ctx->write_string_len(ctx->application_data, "=\"", strlen("=\"")));
+ xmlChar *content = xmlNodeGetContent((const xmlNode *) attr);
+ if (content != NULL) {
+ zend_result result = dom_html5_escape_string(ctx, (const char *) content, true);
+ xmlFree(content);
+ TRY(result);
+ }
+ TRY(ctx->write_string_len(ctx->application_data, "\"", strlen("\"")));
+ }
+
+ return ctx->write_string_len(ctx->application_data, ">", strlen(">"));
+
+ /* Note: "continue on to the next child if the element is void" is handled in the iteration and dom_html5_serialize_element_end() */
+}
+
+/* https://html.spec.whatwg.org/multipage/syntax.html#void-elements
+ * https://html.spec.whatwg.org/multipage/parsing.html#serializes-as-void */
+static bool dom_html5_serializes_as_void(const xmlNode *node)
+{
+ if (dom_is_html_ns(node)) {
+ size_t name_length = strlen((const char *) node->name);
+ if (/* These are the void elements from https://html.spec.whatwg.org/multipage/syntax.html#void-elements */
+ dom_local_name_compare_ex(node, "area", strlen("area"), name_length)
+ || dom_local_name_compare_ex(node, "base", strlen("base"), name_length)
+ || dom_local_name_compare_ex(node, "br", strlen("br"), name_length)
+ || dom_local_name_compare_ex(node, "col", strlen("col"), name_length)
+ || dom_local_name_compare_ex(node, "embed", strlen("embed"), name_length)
+ || dom_local_name_compare_ex(node, "hr", strlen("hr"), name_length)
+ || dom_local_name_compare_ex(node, "img", strlen("img"), name_length)
+ || dom_local_name_compare_ex(node, "input", strlen("input"), name_length)
+ || dom_local_name_compare_ex(node, "link", strlen("link"), name_length)
+ || dom_local_name_compare_ex(node, "meta", strlen("meta"), name_length)
+ || dom_local_name_compare_ex(node, "source", strlen("source"), name_length)
+ || dom_local_name_compare_ex(node, "track", strlen("track"), name_length)
+ || dom_local_name_compare_ex(node, "wbr", strlen("wbr"), name_length)
+ /* These are the additional names from https://html.spec.whatwg.org/multipage/parsing.html#serializes-as-void */
+ || dom_local_name_compare_ex(node, "basefont", strlen("basefont"), name_length)
+ || dom_local_name_compare_ex(node, "bgsound", strlen("bgsound"), name_length)
+ || dom_local_name_compare_ex(node, "frame", strlen("frame"), name_length)
+ || dom_local_name_compare_ex(node, "keygen", strlen("keygen"), name_length)
+ || dom_local_name_compare_ex(node, "param", strlen("param"), name_length)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static zend_result dom_html5_serialize_element_end(dom_html5_serialize_context *ctx, const xmlNode *node)
+{
+ if (!dom_html5_serializes_as_void(node)) {
+ TRY(ctx->write_string_len(ctx->application_data, "", strlen("")));
+ TRY(dom_html5_serialize_element_tag_name(ctx, node));
+ return ctx->write_string_len(ctx->application_data, ">", strlen(">"));
+ }
+ return SUCCESS;
+}
+
+/* https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-serialisation-algorithm */
+static zend_result dom_html5_serialize_node(dom_html5_serialize_context *ctx, const xmlNode *node, const xmlNode *bound)
+{
+ while (node != NULL) {
+ switch (node->type) {
+ case XML_DTD_NODE: {
+ TRY(dom_html5_serialize_doctype(ctx, (const xmlDtd *) node));
+ break;
+ }
+
+ case XML_CDATA_SECTION_NODE:
+ case XML_TEXT_NODE: {
+ TRY(dom_html5_serialize_text_node(ctx, node));
+ break;
+ }
+
+ case XML_PI_NODE: {
+ TRY(dom_html5_serialize_processing_instruction(ctx, node));
+ break;
+ }
+
+ case XML_COMMENT_NODE: {
+ TRY(dom_html5_serialize_comment(ctx, node));
+ break;
+ }
+
+ case XML_ELEMENT_NODE: {
+ TRY(dom_html5_serialize_element_start(ctx, node));
+ if (node->children) {
+ if (!dom_html5_serializes_as_void(node)) {
+ node = node->children;
+ continue;
+ }
+ } else {
+ /* Not descended, so wouldn't put the closing tag as it's normally only done when going back upwards. */
+ TRY(dom_html5_serialize_element_end(ctx, node));
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ if (node->next) {
+ node = node->next;
+ } else {
+ /* Go upwards, until we find a parent node with a next sibling, or until we hit the bound. */
+ do {
+ node = node->parent;
+ if (node == bound) {
+ return SUCCESS;
+ }
+ if (node->type == XML_ELEMENT_NODE) {
+ TRY(dom_html5_serialize_element_end(ctx, node));
+ }
+ } while (node->next == NULL);
+ node = node->next;
+ }
+ }
+
+ return SUCCESS;
+}
+
+/* https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments
+ * Note: this serializes the _children_, excluding the node itself! */
+zend_result dom_html5_serialize(dom_html5_serialize_context *ctx, const xmlNode *node)
+{
+ /* Step 1. Note that this algorithm serializes children. Only elements, documents, and fragments can have children. */
+ if (node->type != XML_ELEMENT_NODE && node->type != XML_DOCUMENT_FRAG_NODE && node->type != XML_DOCUMENT_NODE && node->type != XML_HTML_DOCUMENT_NODE) {
+ return SUCCESS;
+ }
+ if (node->type == XML_ELEMENT_NODE && dom_html5_serializes_as_void(node)) {
+ return SUCCESS;
+ }
+
+ /* Step 2 not needed because we're not using a string to store the serialized data */
+ /* Step 3 not needed because we don't support template contents yet */
+
+ /* Step 4 */
+ return dom_html5_serialize_node(ctx, node->children, node);
+}
+
+#endif /* HAVE_LIBXML && HAVE_DOM */
diff --git a/ext/dom/html5_serializer.h b/ext/dom/html5_serializer.h
new file mode 100644
index 0000000000000..a7eb4ee9be0c1
--- /dev/null
+++ b/ext/dom/html5_serializer.h
@@ -0,0 +1,31 @@
+/*
+ +----------------------------------------------------------------------+
+ | Copyright (c) The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | https://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Niels Dossche |
+ +----------------------------------------------------------------------+
+*/
+
+#ifndef HTML5_SERIALIZER_H
+#define HTML5_SERIALIZER_H
+
+#include
+#include
+
+typedef struct {
+ zend_result (*write_string)(void *application_data, const char *buf);
+ zend_result (*write_string_len)(void *application_data, const char *buf, size_t len);
+ void *application_data;
+} dom_html5_serialize_context;
+
+zend_result dom_html5_serialize(dom_html5_serialize_context *ctx, const xmlNode *node);
+
+#endif
diff --git a/ext/dom/namespace_compat.c b/ext/dom/namespace_compat.c
new file mode 100644
index 0000000000000..efd51cd6545d6
--- /dev/null
+++ b/ext/dom/namespace_compat.c
@@ -0,0 +1,54 @@
+/*
+ +----------------------------------------------------------------------+
+ | Copyright (c) The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | https://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Niels Dossche |
+ +----------------------------------------------------------------------+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "php.h"
+#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
+#include "php_dom.h"
+#include "namespace_compat.h"
+
+bool dom_ns_is_also_an_attribute(const xmlNs *ns) {
+ return ns->_private != NULL;
+}
+
+void dom_ns_compat_mark_attribute(xmlNsPtr ns) {
+ ns->_private = (void *) 1;
+}
+
+void dom_ns_compat_mark_attribute_list(xmlNsPtr ns) {
+ while (ns != NULL) {
+ dom_ns_compat_mark_attribute(ns);
+ ns = ns->next;
+ }
+}
+
+void dom_ns_compat_copy_attribute_list_mark(xmlNsPtr copy, const xmlNs *original) {
+ /* It's possible that the original list is shorter than the copy list
+ * because of additional namespace copies from within a fragment. */
+ while (original != NULL) {
+ ZEND_ASSERT(copy != NULL);
+ if (dom_ns_is_also_an_attribute(original)) {
+ dom_ns_compat_mark_attribute(copy);
+ }
+ copy = copy->next;
+ original = original->next;
+ }
+}
+
+#endif /* HAVE_LIBXML && HAVE_DOM */
diff --git a/ext/dom/namespace_compat.h b/ext/dom/namespace_compat.h
new file mode 100644
index 0000000000000..ab514a08bbada
--- /dev/null
+++ b/ext/dom/namespace_compat.h
@@ -0,0 +1,39 @@
+/*
+ +----------------------------------------------------------------------+
+ | Copyright (c) The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | https://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Niels Dossche |
+ +----------------------------------------------------------------------+
+*/
+
+#ifndef NAMESPACE_COMPAT_H
+#define NAMESPACE_COMPAT_H
+
+#include
+
+/* https://infra.spec.whatwg.org/#namespaces */
+#define DOM_XHTML_NS_URI "http://www.w3.org/1999/xhtml"
+#define DOM_MATHML_NS_URI "http://www.w3.org/1998/Math/MathML"
+#define DOM_SVG_NS_URI "http://www.w3.org/2000/svg"
+#define DOM_XLINK_NS_URI "http://www.w3.org/1999/xlink"
+#define DOM_XML_NS_URI "http://www.w3.org/XML/1998/namespace"
+#define DOM_XMLNS_NS_URI "http://www.w3.org/2000/xmlns/"
+
+/* These functions make it possible to make a namespace declaration also visible as an attribute by
+ * setting a flag that can be checked with dom_ns_is_also_an_attribute().
+ * This is used in the serializer for example. */
+
+bool dom_ns_is_also_an_attribute(const xmlNs *ns);
+void dom_ns_compat_mark_attribute(xmlNsPtr ns);
+void dom_ns_compat_mark_attribute_list(xmlNsPtr ns);
+void dom_ns_compat_copy_attribute_list_mark(xmlNsPtr copy, const xmlNs *original);
+
+#endif
diff --git a/ext/dom/node.c b/ext/dom/node.c
index cd62565df884d..d053fbceb065f 100644
--- a/ext/dom/node.c
+++ b/ext/dom/node.c
@@ -1329,7 +1329,7 @@ PHP_METHOD(DOMNode, cloneNode)
DOM_GET_OBJ(n, id, xmlNodePtr, intern);
- node = dom_clone_node(n, n->doc, recursive);
+ node = dom_clone_node(n, n->doc, intern, recursive);
if (!node) {
RETURN_FALSE;
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index 99d2f7fdd1d2a..2fcdae09709b1 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -27,6 +27,8 @@
#include "php_dom_arginfo.h"
#include "dom_properties.h"
#include "zend_interfaces.h"
+#include "lexbor/lexbor/core/types.h"
+#include "lexbor/lexbor/core/lexbor.h"
#include "ext/standard/info.h"
#define PHP_XPATH 1
@@ -40,6 +42,7 @@ PHP_DOM_EXPORT zend_class_entry *dom_childnode_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_domimplementation_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_documentfragment_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_document_class_entry;
+PHP_DOM_EXPORT zend_class_entry *dom_html5_document_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_nodelist_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_namednodemap_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_characterdata_class_entry;
@@ -70,6 +73,7 @@ zend_object_handlers dom_xpath_object_handlers;
static HashTable classes;
/* {{{ prop handler tables */
static HashTable dom_document_prop_handlers;
+static HashTable dom_html5_document_prop_handlers;
static HashTable dom_documentfragment_prop_handlers;
static HashTable dom_node_prop_handlers;
static HashTable dom_nodelist_prop_handlers;
@@ -206,6 +210,7 @@ static void dom_copy_doc_props(php_libxml_ref_obj *source_doc, php_libxml_ref_ob
zend_hash_copy(dest->classmap, source->classmap, NULL);
}
+ dest_doc->is_html5_class = source_doc->is_html5_class;
}
}
@@ -586,6 +591,22 @@ static zend_object *dom_objects_store_clone_obj(zend_object *zobject);
void dom_xpath_objects_free_storage(zend_object *object);
#endif
+static void *dom_malloc(size_t size) {
+ return emalloc(size);
+}
+
+static void *dom_realloc(void *dst, size_t size) {
+ return erealloc(dst, size);
+}
+
+static void *dom_calloc(size_t num, size_t size) {
+ return ecalloc(num, size);
+}
+
+static void dom_free(void *ptr) {
+ efree(ptr);
+}
+
/* {{{ PHP_MINIT_FUNCTION(dom) */
PHP_MINIT_FUNCTION(dom)
{
@@ -705,6 +726,13 @@ PHP_MINIT_FUNCTION(dom)
zend_hash_merge(&dom_document_prop_handlers, &dom_node_prop_handlers, dom_copy_prop_handler, 0);
zend_hash_add_ptr(&classes, dom_document_class_entry->name, &dom_document_prop_handlers);
+ dom_html5_document_class_entry = register_class_DOM_HTML5Document(dom_document_class_entry);
+ dom_document_class_entry->create_object = dom_objects_new;
+ zend_hash_init(&dom_html5_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1);
+ dom_register_prop_handler(&dom_html5_document_prop_handlers, "encoding", sizeof("encoding")-1, dom_document_encoding_read, dom_html5_document_encoding_write);
+ zend_hash_merge(&dom_html5_document_prop_handlers, &dom_document_prop_handlers, dom_copy_prop_handler, 0);
+ zend_hash_add_ptr(&classes, dom_html5_document_class_entry->name, &dom_html5_document_prop_handlers);
+
dom_nodelist_class_entry = register_class_DOMNodeList(zend_ce_aggregate, zend_ce_countable);
dom_nodelist_class_entry->create_object = dom_nnodemap_objects_new;
dom_nodelist_class_entry->default_object_handlers = &dom_nodelist_object_handlers;
@@ -845,6 +873,8 @@ PHP_MINIT_FUNCTION(dom)
php_libxml_register_export(dom_node_class_entry, php_dom_export_node);
+ lexbor_memory_setup(dom_malloc, dom_realloc, dom_calloc, dom_free);
+
return SUCCESS;
}
/* }}} */
@@ -876,6 +906,7 @@ PHP_MINFO_FUNCTION(dom)
PHP_MSHUTDOWN_FUNCTION(dom) /* {{{ */
{
zend_hash_destroy(&dom_document_prop_handlers);
+ zend_hash_destroy(&dom_html5_document_prop_handlers);
zend_hash_destroy(&dom_documentfragment_prop_handlers);
zend_hash_destroy(&dom_node_prop_handlers);
zend_hash_destroy(&dom_namespace_node_prop_handlers);
@@ -1174,7 +1205,11 @@ PHP_DOM_EXPORT bool php_dom_create_object(xmlNodePtr obj, zval *return_value, do
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
{
- ce = dom_document_class_entry;
+ if (domobj && domobj->document->is_html5_class) {
+ ce = dom_html5_document_class_entry;
+ } else {
+ ce = dom_document_class_entry;
+ }
break;
}
case XML_DTD_NODE:
@@ -1819,14 +1854,23 @@ static int dom_nodemap_has_dimension(zend_object *object, zval *member, int chec
return offset >= 0 && offset < php_dom_get_namednodemap_length(php_dom_obj_from_obj(object));
} /* }}} end dom_nodemap_has_dimension */
-xmlNodePtr dom_clone_node(xmlNodePtr node, xmlDocPtr doc, bool recursive)
+xmlNodePtr dom_clone_node(xmlNodePtr node, xmlDocPtr doc, const dom_object *intern, bool recursive)
{
/* See http://www.xmlsoft.org/html/libxml-tree.html#xmlDocCopyNode for meaning of values */
int extended_recursive = recursive;
if (!recursive && node->type == XML_ELEMENT_NODE) {
extended_recursive = 2;
}
- return xmlDocCopyNode(node, doc, extended_recursive);
+ xmlNodePtr copy = xmlDocCopyNode(node, doc, extended_recursive);
+ if (UNEXPECTED(!copy)) {
+ return NULL;
+ }
+
+ if (intern->document && intern->document->is_html5_class) {
+ dom_mark_namespaces_for_copy_based_on_copy(copy, node);
+ }
+
+ return copy;
}
#endif /* HAVE_DOM */
diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h
index df13dcef0d6cc..d212ca0e61c5f 100644
--- a/ext/dom/php_dom.h
+++ b/ext/dom/php_dom.h
@@ -114,6 +114,8 @@ static inline dom_object_namespace_node *php_dom_namespace_node_obj_from_obj(zen
#include "domexception.h"
+#define DOM_HTML_NO_DEFAULT_NS (1U << 31)
+
dom_object *dom_object_get_data(xmlNodePtr obj);
dom_doc_propsptr dom_get_doc_props(php_libxml_ref_obj *document);
libxml_doc_props const* dom_get_doc_props_read_only(const php_libxml_ref_obj *document);
@@ -154,6 +156,13 @@ bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, x
xmlNsPtr dom_get_ns_resolve_prefix_conflict(xmlNodePtr tree, const char *uri);
void php_dom_reconcile_attribute_namespace_after_insertion(xmlAttrPtr attrp);
+void php_dom_document_constructor(INTERNAL_FUNCTION_PARAMETERS);
+
+#define DOM_LOAD_STRING 0
+#define DOM_LOAD_FILE 1
+
+void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode, xmlDocPtr *doc_out);
+
/* parentnode */
void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc);
void dom_parent_node_append(dom_object *context, zval *nodes, uint32_t nodesc);
@@ -174,7 +183,8 @@ void php_dom_nodelist_get_item_into_zval(dom_nnodemap_object *objmap, zend_long
int php_dom_get_namednodemap_length(dom_object *obj);
int php_dom_get_nodelist_length(dom_object *obj);
-xmlNodePtr dom_clone_node(xmlNodePtr node, xmlDocPtr doc, bool recursive);
+xmlNodePtr dom_clone_node(xmlNodePtr node, xmlDocPtr doc, const dom_object *intern, bool recursive);
+void dom_mark_namespaces_for_copy_based_on_copy(xmlNodePtr copy, const xmlNode *original);
#define DOM_GET_INTERN(__id, __intern) { \
__intern = Z_DOMOBJ_P(__id); \
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index cf4fda78a4bea..291d1a747291a 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -2,1011 +2,1040 @@
/** @generate-class-entries */
-/**
- * @var int
- * @cvalue XML_ELEMENT_NODE
- */
-const XML_ELEMENT_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ATTRIBUTE_NODE
- */
-const XML_ATTRIBUTE_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_TEXT_NODE
- */
-const XML_TEXT_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_CDATA_SECTION_NODE
- */
-const XML_CDATA_SECTION_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ENTITY_REF_NODE
- */
-const XML_ENTITY_REF_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ENTITY_NODE
- */
-const XML_ENTITY_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_PI_NODE
- */
-const XML_PI_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_COMMENT_NODE
- */
-const XML_COMMENT_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_DOCUMENT_NODE
- */
-const XML_DOCUMENT_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_DOCUMENT_TYPE_NODE
- */
-const XML_DOCUMENT_TYPE_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_DOCUMENT_FRAG_NODE
- */
-const XML_DOCUMENT_FRAG_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_NOTATION_NODE
- */
-const XML_NOTATION_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_HTML_DOCUMENT_NODE
- */
-const XML_HTML_DOCUMENT_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_DTD_NODE
- */
-const XML_DTD_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ELEMENT_DECL
- */
-const XML_ELEMENT_DECL_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ATTRIBUTE_DECL
- */
-const XML_ATTRIBUTE_DECL_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ENTITY_DECL
- */
-const XML_ENTITY_DECL_NODE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_NAMESPACE_DECL
- */
-const XML_NAMESPACE_DECL_NODE = UNKNOWN;
+namespace
+{
+ /**
+ * @var int
+ * @cvalue XML_ELEMENT_NODE
+ */
+ const XML_ELEMENT_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ATTRIBUTE_NODE
+ */
+ const XML_ATTRIBUTE_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_TEXT_NODE
+ */
+ const XML_TEXT_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_CDATA_SECTION_NODE
+ */
+ const XML_CDATA_SECTION_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ENTITY_REF_NODE
+ */
+ const XML_ENTITY_REF_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ENTITY_NODE
+ */
+ const XML_ENTITY_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_PI_NODE
+ */
+ const XML_PI_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_COMMENT_NODE
+ */
+ const XML_COMMENT_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_DOCUMENT_NODE
+ */
+ const XML_DOCUMENT_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_DOCUMENT_TYPE_NODE
+ */
+ const XML_DOCUMENT_TYPE_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_DOCUMENT_FRAG_NODE
+ */
+ const XML_DOCUMENT_FRAG_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_NOTATION_NODE
+ */
+ const XML_NOTATION_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_HTML_DOCUMENT_NODE
+ */
+ const XML_HTML_DOCUMENT_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_DTD_NODE
+ */
+ const XML_DTD_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ELEMENT_DECL
+ */
+ const XML_ELEMENT_DECL_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ATTRIBUTE_DECL
+ */
+ const XML_ATTRIBUTE_DECL_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ENTITY_DECL
+ */
+ const XML_ENTITY_DECL_NODE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_NAMESPACE_DECL
+ */
+ const XML_NAMESPACE_DECL_NODE = UNKNOWN;
#ifdef XML_GLOBAL_NAMESPACE
-/**
- * @var int
- * @cvalue XML_GLOBAL_NAMESPACE
- */
-const XML_GLOBAL_NAMESPACE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_GLOBAL_NAMESPACE
+ */
+ const XML_GLOBAL_NAMESPACE = UNKNOWN;
#endif
-/**
- * @var int
- * @cvalue XML_LOCAL_NAMESPACE
- */
-const XML_LOCAL_NAMESPACE = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ATTRIBUTE_CDATA
- */
-const XML_ATTRIBUTE_CDATA = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ATTRIBUTE_ID
- */
-const XML_ATTRIBUTE_ID = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ATTRIBUTE_IDREF
- */
-const XML_ATTRIBUTE_IDREF = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ATTRIBUTE_IDREFS
- */
-const XML_ATTRIBUTE_IDREFS = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ATTRIBUTE_ENTITIES
- */
-const XML_ATTRIBUTE_ENTITY = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ATTRIBUTE_NMTOKEN
- */
-const XML_ATTRIBUTE_NMTOKEN = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ATTRIBUTE_NMTOKENS
- */
-const XML_ATTRIBUTE_NMTOKENS = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ATTRIBUTE_ENUMERATION
- */
-const XML_ATTRIBUTE_ENUMERATION = UNKNOWN;
-/**
- * @var int
- * @cvalue XML_ATTRIBUTE_NOTATION
- */
-const XML_ATTRIBUTE_NOTATION = UNKNOWN;
-
-/**
- * @var int
- * @cvalue PHP_ERR
- */
-const DOM_PHP_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue INDEX_SIZE_ERR
- */
-const DOM_INDEX_SIZE_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue DOMSTRING_SIZE_ERR
- */
-const DOMSTRING_SIZE_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue HIERARCHY_REQUEST_ERR
- */
-const DOM_HIERARCHY_REQUEST_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue WRONG_DOCUMENT_ERR
- */
-const DOM_WRONG_DOCUMENT_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue INVALID_CHARACTER_ERR
- */
-const DOM_INVALID_CHARACTER_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue NO_DATA_ALLOWED_ERR
- */
-const DOM_NO_DATA_ALLOWED_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue NO_MODIFICATION_ALLOWED_ERR
- */
-const DOM_NO_MODIFICATION_ALLOWED_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue NOT_FOUND_ERR
- */
-const DOM_NOT_FOUND_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue NOT_SUPPORTED_ERR
- */
-const DOM_NOT_SUPPORTED_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue INUSE_ATTRIBUTE_ERR
- */
-const DOM_INUSE_ATTRIBUTE_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue INVALID_STATE_ERR
- */
-const DOM_INVALID_STATE_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue SYNTAX_ERR
- */
-const DOM_SYNTAX_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue INVALID_MODIFICATION_ERR
- */
-const DOM_INVALID_MODIFICATION_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue NAMESPACE_ERR
- */
-const DOM_NAMESPACE_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue INVALID_ACCESS_ERR
- */
-const DOM_INVALID_ACCESS_ERR = UNKNOWN;
-/**
- * @var int
- * @cvalue VALIDATION_ERR
- */
-const DOM_VALIDATION_ERR = UNKNOWN;
-
-class DOMDocumentType extends DOMNode
-{
- /** @readonly */
- public string $name;
+ /**
+ * @var int
+ * @cvalue XML_LOCAL_NAMESPACE
+ */
+ const XML_LOCAL_NAMESPACE = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ATTRIBUTE_CDATA
+ */
+ const XML_ATTRIBUTE_CDATA = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ATTRIBUTE_ID
+ */
+ const XML_ATTRIBUTE_ID = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ATTRIBUTE_IDREF
+ */
+ const XML_ATTRIBUTE_IDREF = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ATTRIBUTE_IDREFS
+ */
+ const XML_ATTRIBUTE_IDREFS = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ATTRIBUTE_ENTITIES
+ */
+ const XML_ATTRIBUTE_ENTITY = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ATTRIBUTE_NMTOKEN
+ */
+ const XML_ATTRIBUTE_NMTOKEN = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ATTRIBUTE_NMTOKENS
+ */
+ const XML_ATTRIBUTE_NMTOKENS = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ATTRIBUTE_ENUMERATION
+ */
+ const XML_ATTRIBUTE_ENUMERATION = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue XML_ATTRIBUTE_NOTATION
+ */
+ const XML_ATTRIBUTE_NOTATION = UNKNOWN;
- /** @readonly */
- public DOMNamedNodeMap $entities;
+ /**
+ * @var int
+ * @cvalue PHP_ERR
+ */
+ const DOM_PHP_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue INDEX_SIZE_ERR
+ */
+ const DOM_INDEX_SIZE_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue DOMSTRING_SIZE_ERR
+ */
+ const DOMSTRING_SIZE_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue HIERARCHY_REQUEST_ERR
+ */
+ const DOM_HIERARCHY_REQUEST_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue WRONG_DOCUMENT_ERR
+ */
+ const DOM_WRONG_DOCUMENT_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue INVALID_CHARACTER_ERR
+ */
+ const DOM_INVALID_CHARACTER_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue NO_DATA_ALLOWED_ERR
+ */
+ const DOM_NO_DATA_ALLOWED_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue NO_MODIFICATION_ALLOWED_ERR
+ */
+ const DOM_NO_MODIFICATION_ALLOWED_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue NOT_FOUND_ERR
+ */
+ const DOM_NOT_FOUND_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue NOT_SUPPORTED_ERR
+ */
+ const DOM_NOT_SUPPORTED_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue INUSE_ATTRIBUTE_ERR
+ */
+ const DOM_INUSE_ATTRIBUTE_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue INVALID_STATE_ERR
+ */
+ const DOM_INVALID_STATE_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue SYNTAX_ERR
+ */
+ const DOM_SYNTAX_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue INVALID_MODIFICATION_ERR
+ */
+ const DOM_INVALID_MODIFICATION_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue NAMESPACE_ERR
+ */
+ const DOM_NAMESPACE_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue INVALID_ACCESS_ERR
+ */
+ const DOM_INVALID_ACCESS_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue VALIDATION_ERR
+ */
+ const DOM_VALIDATION_ERR = UNKNOWN;
- /** @readonly */
- public DOMNamedNodeMap $notations;
+ class DOMDocumentType extends DOMNode
+ {
+ /** @readonly */
+ public string $name;
- /** @readonly */
- public string $publicId;
+ /** @readonly */
+ public DOMNamedNodeMap $entities;
- /** @readonly */
- public string $systemId;
+ /** @readonly */
+ public DOMNamedNodeMap $notations;
- /** @readonly */
- public ?string $internalSubset;
-}
+ /** @readonly */
+ public string $publicId;
-class DOMCdataSection extends DOMText
-{
- public function __construct(string $data) {}
-}
+ /** @readonly */
+ public string $systemId;
-class DOMComment extends DOMCharacterData
-{
- public function __construct(string $data = "") {}
-}
+ /** @readonly */
+ public ?string $internalSubset;
+ }
-interface DOMParentNode
-{
- /** @param DOMNode|string $nodes */
- public function append(...$nodes): void;
+ class DOMCdataSection extends DOMText
+ {
+ public function __construct(string $data) {}
+ }
- /** @param DOMNode|string $nodes */
- public function prepend(...$nodes): void;
+ class DOMComment extends DOMCharacterData
+ {
+ public function __construct(string $data = "") {}
+ }
- /** @param DOMNode|string $nodes */
- public function replaceChildren(...$nodes): void;
-}
+ interface DOMParentNode
+ {
+ /** @param DOMNode|string $nodes */
+ public function append(...$nodes): void;
-interface DOMChildNode
-{
- public function remove(): void;
+ /** @param DOMNode|string $nodes */
+ public function prepend(...$nodes): void;
- /** @param DOMNode|string $nodes */
- public function before(... $nodes): void;
+ /** @param DOMNode|string $nodes */
+ public function replaceChildren(...$nodes): void;
+ }
- /** @param DOMNode|string $nodes */
- public function after(...$nodes): void;
+ interface DOMChildNode
+ {
+ public function remove(): void;
- /** @param DOMNode|string $nodes */
- public function replaceWith(...$nodes): void;
-}
+ /** @param DOMNode|string $nodes */
+ public function before(... $nodes): void;
-class DOMNode
-{
- public const int DOCUMENT_POSITION_DISCONNECTED = 0x01;
- public const int DOCUMENT_POSITION_PRECEDING = 0x02;
- public const int DOCUMENT_POSITION_FOLLOWING = 0x04;
- public const int DOCUMENT_POSITION_CONTAINS = 0x08;
- public const int DOCUMENT_POSITION_CONTAINED_BY = 0x10;
- public const int DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC = 0x20;
+ /** @param DOMNode|string $nodes */
+ public function after(...$nodes): void;
- /** @readonly */
- public string $nodeName;
+ /** @param DOMNode|string $nodes */
+ public function replaceWith(...$nodes): void;
+ }
- public ?string $nodeValue;
+ class DOMNode
+ {
+ public const int DOCUMENT_POSITION_DISCONNECTED = 0x01;
+ public const int DOCUMENT_POSITION_PRECEDING = 0x02;
+ public const int DOCUMENT_POSITION_FOLLOWING = 0x04;
+ public const int DOCUMENT_POSITION_CONTAINS = 0x08;
+ public const int DOCUMENT_POSITION_CONTAINED_BY = 0x10;
+ public const int DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC = 0x20;
- /** @readonly */
- public int $nodeType;
+ /** @readonly */
+ public string $nodeName;
- /** @readonly */
- public ?DOMNode $parentNode;
+ public ?string $nodeValue;
- /** @readonly */
- public ?DOMElement $parentElement;
+ /** @readonly */
+ public int $nodeType;
- /** @readonly */
- public DOMNodeList $childNodes;
+ /** @readonly */
+ public ?DOMNode $parentNode;
- /** @readonly */
- public ?DOMNode $firstChild;
+ /** @readonly */
+ public ?DOMElement $parentElement;
- /** @readonly */
- public ?DOMNode $lastChild;
+ /** @readonly */
+ public DOMNodeList $childNodes;
- /** @readonly */
- public ?DOMNode $previousSibling;
+ /** @readonly */
+ public ?DOMNode $firstChild;
- /** @readonly */
- public ?DOMNode $nextSibling;
+ /** @readonly */
+ public ?DOMNode $lastChild;
- /** @readonly */
- public ?DOMNamedNodeMap $attributes;
+ /** @readonly */
+ public ?DOMNode $previousSibling;
- /** @readonly */
- public bool $isConnected;
+ /** @readonly */
+ public ?DOMNode $nextSibling;
- /** @readonly */
- public ?DOMDocument $ownerDocument;
+ /** @readonly */
+ public ?DOMNamedNodeMap $attributes;
- /** @readonly */
- public ?string $namespaceURI;
+ /** @readonly */
+ public bool $isConnected;
- public string $prefix;
+ /** @readonly */
+ public ?DOMDocument $ownerDocument;
- /** @readonly */
- public ?string $localName;
+ /** @readonly */
+ public ?string $namespaceURI;
- /** @readonly */
- public ?string $baseURI;
+ public string $prefix;
- public string $textContent;
+ /** @readonly */
+ public ?string $localName;
- public function __sleep(): array {}
+ /** @readonly */
+ public ?string $baseURI;
- public function __wakeup(): void {}
+ public string $textContent;
- /** @return DOMNode|false */
- public function appendChild(DOMNode $node) {}
+ /** @return DOMNode|false */
+ public function appendChild(DOMNode $node) {}
- /** @tentative-return-type */
- public function C14N(bool $exclusive = false, bool $withComments = false, ?array $xpath = null, ?array $nsPrefixes = null): string|false {}
+ /** @tentative-return-type */
+ public function C14N(bool $exclusive = false, bool $withComments = false, ?array $xpath = null, ?array $nsPrefixes = null): string|false {}
- /** @tentative-return-type */
- public function C14NFile(string $uri, bool $exclusive = false, bool $withComments = false, ?array $xpath = null, ?array $nsPrefixes = null): int|false {}
+ /** @tentative-return-type */
+ public function C14NFile(string $uri, bool $exclusive = false, bool $withComments = false, ?array $xpath = null, ?array $nsPrefixes = null): int|false {}
- /** @return DOMNode|false */
- public function cloneNode(bool $deep = false) {}
+ /** @return DOMNode|false */
+ public function cloneNode(bool $deep = false) {}
- /** @tentative-return-type */
- public function getLineNo(): int {}
+ /** @tentative-return-type */
+ public function getLineNo(): int {}
- /** @tentative-return-type */
- public function getNodePath(): ?string {}
+ /** @tentative-return-type */
+ public function getNodePath(): ?string {}
- /** @tentative-return-type */
- public function hasAttributes(): bool {}
+ /** @tentative-return-type */
+ public function hasAttributes(): bool {}
- /** @tentative-return-type */
- public function hasChildNodes(): bool {}
+ /** @tentative-return-type */
+ public function hasChildNodes(): bool {}
- /** @return DOMNode|false */
- public function insertBefore(DOMNode $node, ?DOMNode $child = null) {}
+ /** @return DOMNode|false */
+ public function insertBefore(DOMNode $node, ?DOMNode $child = null) {}
- /** @tentative-return-type */
- public function isDefaultNamespace(string $namespace): bool {}
+ /** @tentative-return-type */
+ public function isDefaultNamespace(string $namespace): bool {}
- /** @tentative-return-type */
- public function isSameNode(DOMNode $otherNode): bool {}
+ /** @tentative-return-type */
+ public function isSameNode(DOMNode $otherNode): bool {}
- public function isEqualNode(?DOMNode $otherNode): bool {}
+ public function isEqualNode(?DOMNode $otherNode): bool {}
- /** @tentative-return-type */
- public function isSupported(string $feature, string $version): bool {}
+ /** @tentative-return-type */
+ public function isSupported(string $feature, string $version): bool {}
- /** @tentative-return-type */
- public function lookupNamespaceURI(?string $prefix): ?string {}
+ /** @tentative-return-type */
+ public function lookupNamespaceURI(?string $prefix): ?string {}
- /** @tentative-return-type */
- public function lookupPrefix(string $namespace): ?string {}
+ /** @tentative-return-type */
+ public function lookupPrefix(string $namespace): ?string {}
- /** @tentative-return-type */
- public function normalize(): void {}
+ /** @tentative-return-type */
+ public function normalize(): void {}
- /** @return DOMNode|false */
- public function removeChild(DOMNode $child) {}
+ /** @return DOMNode|false */
+ public function removeChild(DOMNode $child) {}
- /** @return DOMNode|false */
- public function replaceChild(DOMNode $node, DOMNode $child) {}
+ /** @return DOMNode|false */
+ public function replaceChild(DOMNode $node, DOMNode $child) {}
- public function contains(DOMNode|DOMNameSpaceNode|null $other): bool {}
+ public function contains(DOMNode|DOMNameSpaceNode|null $other): bool {}
- public function getRootNode(?array $options = null): DOMNode {}
+ public function getRootNode(?array $options = null): DOMNode {}
- public function compareDocumentPosition(DOMNode $other): int {}
-}
+ public function compareDocumentPosition(DOMNode $other): int {}
-class DOMNameSpaceNode
-{
- /** @readonly */
- public string $nodeName;
+ public function __sleep(): array {}
+
+ public function __wakeup(): void {}
+ }
- /** @readonly */
- public ?string $nodeValue;
+ class DOMNameSpaceNode
+ {
+ /** @readonly */
+ public string $nodeName;
- /** @readonly */
- public int $nodeType;
+ /** @readonly */
+ public ?string $nodeValue;
- /** @readonly */
- public string $prefix;
+ /** @readonly */
+ public int $nodeType;
- /** @readonly */
- public ?string $localName;
+ /** @readonly */
+ public string $prefix;
- /** @readonly */
- public ?string $namespaceURI;
+ /** @readonly */
+ public ?string $localName;
- /** @readonly */
- public bool $isConnected;
+ /** @readonly */
+ public ?string $namespaceURI;
- /** @readonly */
- public ?DOMDocument $ownerDocument;
+ /** @readonly */
+ public bool $isConnected;
- /** @readonly */
- public ?DOMNode $parentNode;
+ /** @readonly */
+ public ?DOMDocument $ownerDocument;
- /** @readonly */
- public ?DOMElement $parentElement;
+ /** @readonly */
+ public ?DOMNode $parentNode;
- /** @implementation-alias DOMNode::__sleep */
- public function __sleep(): array {}
+ /** @readonly */
+ public ?DOMElement $parentElement;
- /** @implementation-alias DOMNode::__wakeup */
- public function __wakeup(): void {}
-}
+ /** @implementation-alias DOMNode::__sleep */
+ public function __sleep(): array {}
+
+ /** @implementation-alias DOMNode::__wakeup */
+ public function __wakeup(): void {}
+ }
-class DOMImplementation
-{
- /** @tentative-return-type */
- public function getFeature(string $feature, string $version): never {}
+ class DOMImplementation
+ {
+ /** @tentative-return-type */
+ public function getFeature(string $feature, string $version): never {}
- /** @tentative-return-type */
- public function hasFeature(string $feature, string $version): bool {}
+ /** @tentative-return-type */
+ public function hasFeature(string $feature, string $version): bool {}
- /** @return DOMDocumentType|false */
- public function createDocumentType(string $qualifiedName, string $publicId = "", string $systemId = "") {}
+ /** @return DOMDocumentType|false */
+ public function createDocumentType(string $qualifiedName, string $publicId = "", string $systemId = "") {}
- /** @return DOMDocument|false */
- public function createDocument(?string $namespace = null, string $qualifiedName = "", ?DOMDocumentType $doctype = null) {}
-}
+ /** @return DOMDocument|false */
+ public function createDocument(?string $namespace = null, string $qualifiedName = "", ?DOMDocumentType $doctype = null) {}
+ }
-class DOMDocumentFragment extends DOMNode implements DOMParentNode
-{
- /** @readonly */
- public ?DOMElement $firstElementChild;
+ class DOMDocumentFragment extends DOMNode implements DOMParentNode
+ {
+ /** @readonly */
+ public ?DOMElement $firstElementChild;
- /** @readonly */
- public ?DOMElement $lastElementChild;
+ /** @readonly */
+ public ?DOMElement $lastElementChild;
- /** @readonly */
- public int $childElementCount;
+ /** @readonly */
+ public int $childElementCount;
- public function __construct() {}
+ public function __construct() {}
- /** @tentative-return-type */
- public function appendXML(string $data): bool {}
+ /** @tentative-return-type */
+ public function appendXML(string $data): bool {}
- /**
- * @param DOMNode|string $nodes
- * @implementation-alias DOMElement::append
- */
- public function append(...$nodes): void {}
+ /**
+ * @param DOMNode|string $nodes
+ * @implementation-alias DOMElement::append
+ */
+ public function append(...$nodes): void {}
- /**
- * @param DOMNode|string $nodes
- * @implementation-alias DOMElement::prepend
- */
- public function prepend(...$nodes): void {}
+ /**
+ * @param DOMNode|string $nodes
+ * @implementation-alias DOMElement::prepend
+ */
+ public function prepend(...$nodes): void {}
- /**
- * @param DOMNode|string $nodes
- * @implementation-alias DOMDocument::replaceChildren
- */
- public function replaceChildren(...$nodes): void {}
-}
+ /**
+ * @param DOMNode|string $nodes
+ * @implementation-alias DOMDocument::replaceChildren
+ */
+ public function replaceChildren(...$nodes): void {}
+ }
-class DOMNodeList implements IteratorAggregate, Countable
-{
- /** @readonly */
- public int $length;
+ class DOMNodeList implements IteratorAggregate, Countable
+ {
+ /** @readonly */
+ public int $length;
- /** @tentative-return-type */
- public function count(): int {}
+ /** @tentative-return-type */
+ public function count(): int {}
- public function getIterator(): Iterator {}
+ public function getIterator(): Iterator {}
- /** @return DOMElement|DOMNode|DOMNameSpaceNode|null */
- public function item(int $index) {}
-}
+ /** @return DOMElement|DOMNode|DOMNameSpaceNode|null */
+ public function item(int $index) {}
+ }
-class DOMCharacterData extends DOMNode implements DOMChildNode
-{
- public string $data;
+ class DOMCharacterData extends DOMNode implements DOMChildNode
+ {
+ public string $data;
- /** @readonly */
- public int $length;
+ /** @readonly */
+ public int $length;
- /** @readonly */
- public ?DOMElement $previousElementSibling;
+ /** @readonly */
+ public ?DOMElement $previousElementSibling;
- /** @readonly */
- public ?DOMElement $nextElementSibling;
+ /** @readonly */
+ public ?DOMElement $nextElementSibling;
- /** @tentative-return-type */
- public function appendData(string $data): true {}
+ /** @tentative-return-type */
+ public function appendData(string $data): true {}
- /** @return string|false */
- public function substringData(int $offset, int $count) {}
+ /** @return string|false */
+ public function substringData(int $offset, int $count) {}
- /** @tentative-return-type */
- public function insertData(int $offset, string $data): bool {}
+ /** @tentative-return-type */
+ public function insertData(int $offset, string $data): bool {}
- /** @tentative-return-type */
- public function deleteData(int $offset, int $count): bool {}
+ /** @tentative-return-type */
+ public function deleteData(int $offset, int $count): bool {}
- /** @tentative-return-type */
- public function replaceData(int $offset, int $count, string $data): bool {}
+ /** @tentative-return-type */
+ public function replaceData(int $offset, int $count, string $data): bool {}
- /**
- * @param DOMNode|string $nodes
- * @implementation-alias DOMElement::replaceWith
- */
- public function replaceWith(...$nodes): void {}
+ /**
+ * @param DOMNode|string $nodes
+ * @implementation-alias DOMElement::replaceWith
+ */
+ public function replaceWith(...$nodes): void {}
- /** @implementation-alias DOMElement::remove */
- public function remove(): void {}
+ /** @implementation-alias DOMElement::remove */
+ public function remove(): void {}
- /**
- * @param DOMNode|string $nodes
- * @implementation-alias DOMElement::before
- */
- public function before(... $nodes): void {}
+ /**
+ * @param DOMNode|string $nodes
+ * @implementation-alias DOMElement::before
+ */
+ public function before(... $nodes): void {}
- /**
- * @param DOMNode|string $nodes
- * @implementation-alias DOMElement::after
- */
- public function after(...$nodes): void {}
-}
+ /**
+ * @param DOMNode|string $nodes
+ * @implementation-alias DOMElement::after
+ */
+ public function after(...$nodes): void {}
+ }
-class DOMAttr extends DOMNode
-{
- /** @readonly */
- public string $name;
+ class DOMAttr extends DOMNode
+ {
+ /** @readonly */
+ public string $name;
- /** @readonly */
- public bool $specified = true;
+ /** @readonly */
+ public bool $specified = true;
- public string $value;
+ public string $value;
- /** @readonly */
- public ?DOMElement $ownerElement;
+ /** @readonly */
+ public ?DOMElement $ownerElement;
- /** @readonly */
- public mixed $schemaTypeInfo = null;
+ /** @readonly */
+ public mixed $schemaTypeInfo = null;
- public function __construct(string $name, string $value = "") {}
+ public function __construct(string $name, string $value = "") {}
- /** @tentative-return-type */
- public function isId(): bool {}
-}
+ /** @tentative-return-type */
+ public function isId(): bool {}
+ }
-class DOMElement extends DOMNode implements DOMParentNode, DOMChildNode
-{
- /** @readonly */
- public string $tagName;
+ class DOMElement extends DOMNode implements DOMParentNode, DOMChildNode
+ {
+ /** @readonly */
+ public string $tagName;
- public string $className;
+ public string $className;
- public string $id;
+ public string $id;
- /** @readonly */
- public mixed $schemaTypeInfo = null;
+ /** @readonly */
+ public mixed $schemaTypeInfo = null;
- /** @readonly */
- public ?DOMElement $firstElementChild;
+ /** @readonly */
+ public ?DOMElement $firstElementChild;
- /** @readonly */
- public ?DOMElement $lastElementChild;
+ /** @readonly */
+ public ?DOMElement $lastElementChild;
- /** @readonly */
- public int $childElementCount;
+ /** @readonly */
+ public int $childElementCount;
- /** @readonly */
- public ?DOMElement $previousElementSibling;
+ /** @readonly */
+ public ?DOMElement $previousElementSibling;
- /** @readonly */
- public ?DOMElement $nextElementSibling;
+ /** @readonly */
+ public ?DOMElement $nextElementSibling;
- public function __construct(string $qualifiedName, ?string $value = null, string $namespace = "") {}
+ public function __construct(string $qualifiedName, ?string $value = null, string $namespace = "") {}
- /** @tentative-return-type */
- public function getAttribute(string $qualifiedName): string {}
+ /** @tentative-return-type */
+ public function getAttribute(string $qualifiedName): string {}
- public function getAttributeNames(): array {}
+ public function getAttributeNames(): array {}
- /** @tentative-return-type */
- public function getAttributeNS(?string $namespace, string $localName): string {}
+ /** @tentative-return-type */
+ public function getAttributeNS(?string $namespace, string $localName): string {}
- /** @return DOMAttr|DOMNameSpaceNode|false */
- public function getAttributeNode(string $qualifiedName) {}
+ /** @return DOMAttr|DOMNameSpaceNode|false */
+ public function getAttributeNode(string $qualifiedName) {}
- /** @return DOMAttr|DOMNameSpaceNode|null */
- public function getAttributeNodeNS(?string $namespace, string $localName) {}
+ /** @return DOMAttr|DOMNameSpaceNode|null */
+ public function getAttributeNodeNS(?string $namespace, string $localName) {}
- /** @tentative-return-type */
- public function getElementsByTagName(string $qualifiedName): DOMNodeList {}
+ /** @tentative-return-type */
+ public function getElementsByTagName(string $qualifiedName): DOMNodeList {}
- /** @tentative-return-type */
- public function getElementsByTagNameNS(?string $namespace, string $localName): DOMNodeList {}
+ /** @tentative-return-type */
+ public function getElementsByTagNameNS(?string $namespace, string $localName): DOMNodeList {}
- /** @tentative-return-type */
- public function hasAttribute(string $qualifiedName): bool {}
+ /** @tentative-return-type */
+ public function hasAttribute(string $qualifiedName): bool {}
- /** @tentative-return-type */
- public function hasAttributeNS(?string $namespace, string $localName): bool {}
+ /** @tentative-return-type */
+ public function hasAttributeNS(?string $namespace, string $localName): bool {}
- /** @tentative-return-type */
- public function removeAttribute(string $qualifiedName): bool {}
+ /** @tentative-return-type */
+ public function removeAttribute(string $qualifiedName): bool {}
- /** @tentative-return-type */
- public function removeAttributeNS(?string $namespace, string $localName): void {}
+ /** @tentative-return-type */
+ public function removeAttributeNS(?string $namespace, string $localName): void {}
- /** @return DOMAttr|false */
- public function removeAttributeNode(DOMAttr $attr) {}
+ /** @return DOMAttr|false */
+ public function removeAttributeNode(DOMAttr $attr) {}
- /** @return DOMAttr|bool */
- public function setAttribute(string $qualifiedName, string $value) {} // TODO return type shouldn't depend on the call scope
+ /** @return DOMAttr|bool */
+ public function setAttribute(string $qualifiedName, string $value) {} // TODO return type shouldn't depend on the call scope
- /** @tentative-return-type */
- public function setAttributeNS(?string $namespace, string $qualifiedName, string $value): void {}
+ /** @tentative-return-type */
+ public function setAttributeNS(?string $namespace, string $qualifiedName, string $value): void {}
- /** @return DOMAttr|null|false */
- public function setAttributeNode(DOMAttr $attr) {}
+ /** @return DOMAttr|null|false */
+ public function setAttributeNode(DOMAttr $attr) {}
- /** @return DOMAttr|null|false */
- public function setAttributeNodeNS(DOMAttr $attr) {}
+ /** @return DOMAttr|null|false */
+ public function setAttributeNodeNS(DOMAttr $attr) {}
- /** @tentative-return-type */
- public function setIdAttribute(string $qualifiedName, bool $isId): void {}
+ /** @tentative-return-type */
+ public function setIdAttribute(string $qualifiedName, bool $isId): void {}
- /** @tentative-return-type */
- public function setIdAttributeNS(string $namespace, string $qualifiedName, bool $isId): void {}
+ /** @tentative-return-type */
+ public function setIdAttributeNS(string $namespace, string $qualifiedName, bool $isId): void {}
- /** @tentative-return-type */
- public function setIdAttributeNode(DOMAttr $attr, bool $isId): void {}
+ /** @tentative-return-type */
+ public function setIdAttributeNode(DOMAttr $attr, bool $isId): void {}
- public function toggleAttribute(string $qualifiedName, ?bool $force = null): bool {}
+ public function toggleAttribute(string $qualifiedName, ?bool $force = null): bool {}
- public function remove(): void {}
+ public function remove(): void {}
- /** @param DOMNode|string $nodes */
- public function before(... $nodes): void {}
+ /** @param DOMNode|string $nodes */
+ public function before(... $nodes): void {}
- /** @param DOMNode|string $nodes */
- public function after(...$nodes): void {}
+ /** @param DOMNode|string $nodes */
+ public function after(...$nodes): void {}
- /** @param DOMNode|string $nodes */
- public function replaceWith(...$nodes): void {}
+ /** @param DOMNode|string $nodes */
+ public function replaceWith(...$nodes): void {}
- /** @param DOMNode|string $nodes */
- public function append(...$nodes): void {}
+ /** @param DOMNode|string $nodes */
+ public function append(...$nodes): void {}
- /** @param DOMNode|string $nodes */
- public function prepend(...$nodes): void {}
+ /** @param DOMNode|string $nodes */
+ public function prepend(...$nodes): void {}
- /** @param DOMNode|string $nodes */
- public function replaceChildren(...$nodes): void {}
+ /** @param DOMNode|string $nodes */
+ public function replaceChildren(...$nodes): void {}
- public function insertAdjacentElement(string $where, DOMElement $element): ?DOMElement {}
+ public function insertAdjacentElement(string $where, DOMElement $element): ?DOMElement {}
- public function insertAdjacentText(string $where, string $data): void {}
-}
+ public function insertAdjacentText(string $where, string $data): void {}
+ }
-class DOMDocument extends DOMNode implements DOMParentNode
-{
- /** @readonly */
- public ?DOMDocumentType $doctype;
+ class DOMDocument extends DOMNode implements DOMParentNode
+ {
+ /** @readonly */
+ public ?DOMDocumentType $doctype;
- /** @readonly */
- public DOMImplementation $implementation;
+ /** @readonly */
+ public DOMImplementation $implementation;
- /** @readonly */
- public ?DOMElement $documentElement;
+ /** @readonly */
+ public ?DOMElement $documentElement;
- /**
- * @readonly
- * @deprecated
- */
- public ?string $actualEncoding;
+ /**
+ * @readonly
+ * @deprecated
+ */
+ public ?string $actualEncoding;
- public ?string $encoding;
+ public ?string $encoding;
- /** @readonly */
- public ?string $xmlEncoding;
+ /** @readonly */
+ public ?string $xmlEncoding;
- public bool $standalone;
+ public bool $standalone;
- public bool $xmlStandalone;
+ public bool $xmlStandalone;
- public ?string $version;
+ public ?string $version;
- public ?string $xmlVersion;
+ public ?string $xmlVersion;
- public bool $strictErrorChecking;
+ public bool $strictErrorChecking;
- public ?string $documentURI;
+ public ?string $documentURI;
- /**
- * @readonly
- * @deprecated
- */
- public mixed $config;
+ /**
+ * @readonly
+ * @deprecated
+ */
+ public mixed $config;
- public bool $formatOutput;
+ public bool $formatOutput;
- public bool $validateOnParse;
+ public bool $validateOnParse;
- public bool $resolveExternals;
+ public bool $resolveExternals;
- public bool $preserveWhiteSpace;
+ public bool $preserveWhiteSpace;
- public bool $recover;
+ public bool $recover;
- public bool $substituteEntities;
+ public bool $substituteEntities;
- /** @readonly */
- public ?DOMElement $firstElementChild;
+ /** @readonly */
+ public ?DOMElement $firstElementChild;
- /** @readonly */
- public ?DOMElement $lastElementChild;
+ /** @readonly */
+ public ?DOMElement $lastElementChild;
- /** @readonly */
- public int $childElementCount;
+ /** @readonly */
+ public int $childElementCount;
- public function __construct(string $version = "1.0", string $encoding = "") {}
+ public function __construct(string $version = "1.0", string $encoding = "") {}
- /** @return DOMAttr|false */
- public function createAttribute(string $localName) {}
+ /** @return DOMAttr|false */
+ public function createAttribute(string $localName) {}
- /** @return DOMAttr|false */
- public function createAttributeNS(?string $namespace, string $qualifiedName) {}
+ /** @return DOMAttr|false */
+ public function createAttributeNS(?string $namespace, string $qualifiedName) {}
- /** @return DOMCdataSection|false */
- public function createCDATASection(string $data) {}
+ /** @return DOMCdataSection|false */
+ public function createCDATASection(string $data) {}
- /** @tentative-return-type */
- public function createComment(string $data): DOMComment {}
+ /** @tentative-return-type */
+ public function createComment(string $data): DOMComment {}
- /** @tentative-return-type */
- public function createDocumentFragment(): DOMDocumentFragment {}
+ /** @tentative-return-type */
+ public function createDocumentFragment(): DOMDocumentFragment {}
- /** @return DOMElement|false */
- public function createElement(string $localName, string $value = "") {}
+ /** @return DOMElement|false */
+ public function createElement(string $localName, string $value = "") {}
- /** @return DOMElement|false */
- public function createElementNS(?string $namespace, string $qualifiedName, string $value = "") {}
+ /** @return DOMElement|false */
+ public function createElementNS(?string $namespace, string $qualifiedName, string $value = "") {}
- /** @return DOMEntityReference|false */
- public function createEntityReference(string $name) {}
+ /** @return DOMEntityReference|false */
+ public function createEntityReference(string $name) {}
- /** @return DOMProcessingInstruction|false */
- public function createProcessingInstruction(string $target, string $data = "") {}
+ /** @return DOMProcessingInstruction|false */
+ public function createProcessingInstruction(string $target, string $data = "") {}
- /** @tentative-return-type */
- public function createTextNode(string $data): DOMText {}
+ /** @tentative-return-type */
+ public function createTextNode(string $data): DOMText {}
- /** @tentative-return-type */
- public function getElementById(string $elementId): ?DOMElement {}
+ /** @tentative-return-type */
+ public function getElementById(string $elementId): ?DOMElement {}
- /** @tentative-return-type */
- public function getElementsByTagName(string $qualifiedName): DOMNodeList {}
+ /** @tentative-return-type */
+ public function getElementsByTagName(string $qualifiedName): DOMNodeList {}
- /** @tentative-return-type */
- public function getElementsByTagNameNS(?string $namespace, string $localName): DOMNodeList {}
+ /** @tentative-return-type */
+ public function getElementsByTagNameNS(?string $namespace, string $localName): DOMNodeList {}
- /** @return DOMNode|false */
- public function importNode(DOMNode $node, bool $deep = false) {}
+ /** @return DOMNode|false */
+ public function importNode(DOMNode $node, bool $deep = false) {}
- /** @tentative-return-type */
- public function load(string $filename, int $options = 0): bool {}
+ /** @tentative-return-type */
+ public function load(string $filename, int $options = 0): bool {}
- /** @tentative-return-type */
- public function loadXML(string $source, int $options = 0): bool {}
+ /** @tentative-return-type */
+ public function loadXML(string $source, int $options = 0): bool {}
- /** @tentative-return-type */
- public function normalizeDocument(): void {}
+ /** @tentative-return-type */
+ public function normalizeDocument(): void {}
- /** @tentative-return-type */
- public function registerNodeClass(string $baseClass, ?string $extendedClass): bool {}
+ /** @tentative-return-type */
+ public function registerNodeClass(string $baseClass, ?string $extendedClass): bool {}
- /** @tentative-return-type */
- public function save(string $filename, int $options = 0): int|false {}
+ /** @tentative-return-type */
+ public function save(string $filename, int $options = 0): int|false {}
#ifdef LIBXML_HTML_ENABLED
- /** @tentative-return-type */
- public function loadHTML(string $source, int $options = 0): bool {}
+ /** @tentative-return-type */
+ public function loadHTML(string $source, int $options = 0): bool {}
- /** @tentative-return-type */
- public function loadHTMLFile(string $filename, int $options = 0): bool {}
+ /** @tentative-return-type */
+ public function loadHTMLFile(string $filename, int $options = 0): bool {}
- /** @tentative-return-type */
- public function saveHTML(?DOMNode $node = null): string|false {}
+ /** @tentative-return-type */
+ public function saveHTML(?DOMNode $node = null): string|false {}
- /** @tentative-return-type */
- public function saveHTMLFile(string $filename): int|false {}
+ /** @tentative-return-type */
+ public function saveHTMLFile(string $filename): int|false {}
#endif
- /** @tentative-return-type */
- public function saveXML(?DOMNode $node = null, int $options = 0): string|false {}
+ /** @tentative-return-type */
+ public function saveXML(?DOMNode $node = null, int $options = 0): string|false {}
-#ifdef LIBXML_SCHEMAS_ENABLED
- /** @tentative-return-type */
- public function schemaValidate(string $filename, int $flags = 0): bool {}
+ #ifdef LIBXML_SCHEMAS_ENABLED
+ /** @tentative-return-type */
+ public function schemaValidate(string $filename, int $flags = 0): bool {}
- /** @tentative-return-type */
- public function schemaValidateSource(string $source, int $flags = 0): bool {}
+ /** @tentative-return-type */
+ public function schemaValidateSource(string $source, int $flags = 0): bool {}
- /** @tentative-return-type */
- public function relaxNGValidate(string $filename): bool {}
+ /** @tentative-return-type */
+ public function relaxNGValidate(string $filename): bool {}
- /** @tentative-return-type */
- public function relaxNGValidateSource(string $source): bool {}
-#endif
-
- /** @tentative-return-type */
- public function validate(): bool {}
+ /** @tentative-return-type */
+ public function relaxNGValidateSource(string $source): bool {}
+ #endif
- /** @tentative-return-type */
- public function xinclude(int $options = 0): int|false {}
+ /** @tentative-return-type */
+ public function validate(): bool {}
+
+ /** @tentative-return-type */
+ public function xinclude(int $options = 0): int|false {}
+
+ /** @tentative-return-type */
+ public function adoptNode(DOMNode $node): DOMNode|false {}
- /** @tentative-return-type */
- public function adoptNode(DOMNode $node): DOMNode|false {}
+ /**
+ * @param DOMNode|string $nodes
+ * @implementation-alias DOMElement::append
+ */
+ public function append(...$nodes): void {}
+
+ /**
+ * @param DOMNode|string $nodes
+ * @implementation-alias DOMElement::prepend
+ */
+ public function prepend(...$nodes): void {}
+
+ /** @param DOMNode|string $nodes */
+ public function replaceChildren(...$nodes): void {}
+ }
- /**
- * @param DOMNode|string $nodes
- * @implementation-alias DOMElement::append
- */
- public function append(...$nodes): void {}
-
- /**
- * @param DOMNode|string $nodes
- * @implementation-alias DOMElement::prepend
- */
- public function prepend(...$nodes): void {}
+ final class DOMException extends Exception
+ {
+ /**
+ * Intentionally left untyped for BC reasons
+ * @var int
+ */
+ public $code = 0; // TODO add proper type (i.e. int|string)
+ }
- /** @param DOMNode|string $nodes */
- public function replaceChildren(...$nodes): void {}
-}
+ class DOMText extends DOMCharacterData
+ {
+ /** @readonly */
+ public string $wholeText;
-final class DOMException extends Exception
-{
- /**
- * Intentionally left untyped for BC reasons
- * @var int
- */
- public $code = 0; // TODO add proper type (i.e. int|string)
-}
+ public function __construct(string $data = "") {}
-class DOMText extends DOMCharacterData
-{
- /** @readonly */
- public string $wholeText;
+ /** @tentative-return-type */
+ public function isWhitespaceInElementContent(): bool {}
- public function __construct(string $data = "") {}
+ /**
+ * @tentative-return-type
+ * @alias DOMText::isWhitespaceInElementContent
+ */
+ public function isElementContentWhitespace(): bool {}
- /** @tentative-return-type */
- public function isWhitespaceInElementContent(): bool {}
+ /** @return DOMText|false */
+ public function splitText(int $offset) {}
+ }
- /**
- * @tentative-return-type
- * @alias DOMText::isWhitespaceInElementContent
- */
- public function isElementContentWhitespace(): bool {}
+ class DOMNamedNodeMap implements IteratorAggregate, Countable
+ {
+ /** @readonly */
+ public int $length;
- /** @return DOMText|false */
- public function splitText(int $offset) {}
-}
+ /** @tentative-return-type */
+ public function getNamedItem(string $qualifiedName): ?DOMNode {} // TODO DOM spec returns DOMAttr
-class DOMNamedNodeMap implements IteratorAggregate, Countable
-{
- /** @readonly */
- public int $length;
+ /** @tentative-return-type */
+ public function getNamedItemNS(?string $namespace, string $localName): ?DOMNode {} // TODO DOM spec returns DOMAttr
- /** @tentative-return-type */
- public function getNamedItem(string $qualifiedName): ?DOMNode {} // TODO DOM spec returns DOMAttr
+ /** @tentative-return-type */
+ public function item(int $index): ?DOMNode {} // TODO DOM spec returns DOMAttr
- /** @tentative-return-type */
- public function getNamedItemNS(?string $namespace, string $localName): ?DOMNode {} // TODO DOM spec returns DOMAttr
+ /** @tentative-return-type */
+ public function count(): int {}
- /** @tentative-return-type */
- public function item(int $index): ?DOMNode {} // TODO DOM spec returns DOMAttr
+ public function getIterator(): Iterator {}
+ }
- /** @tentative-return-type */
- public function count(): int {}
+ class DOMEntity extends DOMNode
+ {
+ /** @readonly */
+ public ?string $publicId;
- public function getIterator(): Iterator {}
-}
+ /** @readonly */
+ public ?string $systemId;
-class DOMEntity extends DOMNode
-{
- /** @readonly */
- public ?string $publicId;
+ /** @readonly */
+ public ?string $notationName;
+
+ /**
+ * @readonly
+ * @deprecated
+ */
+ public ?string $actualEncoding = null;
+
+ /**
+ * @readonly
+ * @deprecated
+ */
+ public ?string $encoding = null;
+
+ /**
+ * @readonly
+ * @deprecated
+ */
+ public ?string $version = null;
+ }
+
+ class DOMEntityReference extends DOMNode
+ {
+ public function __construct(string $name) {}
+ }
+
+ class DOMNotation extends DOMNode
+ {
+ /** @readonly */
+ public string $publicId;
+
+ /** @readonly */
+ public string $systemId;
+ }
+
+ class DOMProcessingInstruction extends DOMNode
+ {
+ /** @readonly */
+ public string $target;
+
+ public string $data;
+
+ public function __construct(string $name, string $value = "") {}
+ }
- /** @readonly */
- public ?string $systemId;
+#ifdef LIBXML_XPATH_ENABLED
+ /** @not-serializable */
+ class DOMXPath
+ {
+ /** @readonly */
+ public DOMDocument $document;
- /** @readonly */
- public ?string $notationName;
+ public bool $registerNodeNamespaces;
- /**
- * @readonly
- * @deprecated
- */
- public ?string $actualEncoding = null;
+ public function __construct(DOMDocument $document, bool $registerNodeNS = true) {}
- /**
- * @readonly
- * @deprecated
- */
- public ?string $encoding = null;
+ /** @tentative-return-type */
+ public function evaluate(string $expression, ?DOMNode $contextNode = null, bool $registerNodeNS = true): mixed {}
- /**
- * @readonly
- * @deprecated
- */
- public ?string $version = null;
-}
+ /** @tentative-return-type */
+ public function query(string $expression, ?DOMNode $contextNode = null, bool $registerNodeNS = true): mixed {}
-class DOMEntityReference extends DOMNode
-{
- public function __construct(string $name) {}
-}
+ /** @tentative-return-type */
+ public function registerNamespace(string $prefix, string $namespace): bool {}
-class DOMNotation extends DOMNode
-{
- /** @readonly */
- public string $publicId;
+ /** @tentative-return-type */
+ public function registerPhpFunctions(string|array|null $restrict = null): void {}
+ }
+#endif
- /** @readonly */
- public string $systemId;
+ function dom_import_simplexml(object $node): DOMElement {}
}
-class DOMProcessingInstruction extends DOMNode
+namespace DOM
{
- /** @readonly */
- public string $target;
-
- public string $data;
-
- public function __construct(string $name, string $value = "") {}
-}
+ /**
+ * @var int
+ * @cvalue DOM_HTML_NO_DEFAULT_NS
+ */
+ const HTML_NO_DEFAULT_NS = UNKNOWN;
-#ifdef LIBXML_XPATH_ENABLED
-/** @not-serializable */
-class DOMXPath
-{
- /** @readonly */
- public DOMDocument $document;
+ class HTML5Document extends \DOMDocument
+ {
+ public function __construct(string $xmlVersion = "1.0", string $encoding = "") {}
- public bool $registerNodeNamespaces;
+ public function load(string $filename, int $options = 0): bool {}
- public function __construct(DOMDocument $document, bool $registerNodeNS = true) {}
+ public function loadXML(string $source, int $options = 0): bool {}
- /** @tentative-return-type */
- public function evaluate(string $expression, ?DOMNode $contextNode = null, bool $registerNodeNS = true): mixed {}
+ public function loadHTML(string $source, int $options = 0): bool {}
- /** @tentative-return-type */
- public function query(string $expression, ?DOMNode $contextNode = null, bool $registerNodeNS = true): mixed {}
+ public function loadHTMLFile(string $filename, int $options = 0): bool {}
- /** @tentative-return-type */
- public function registerNamespace(string $prefix, string $namespace): bool {}
+ public function saveHTML(?\DOMNode $node = null): string|false {}
- /** @tentative-return-type */
- public function registerPhpFunctions(string|array|null $restrict = null): void {}
+ public function saveHTMLFile(string $filename): int|false {}
+ }
}
-#endif
-
-function dom_import_simplexml(object $node): DOMElement {}
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index 694275e5e728f..cffaa09deb8a1 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 134f9c3e73943f3f6815fa0e3a6784fdc9e70600 */
+ * Stub hash: 4d2c0e49b4a7a9d6f99669a2d8f8bf8b646b80c9 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -30,11 +30,6 @@ ZEND_END_ARG_INFO()
#define arginfo_class_DOMChildNode_replaceWith arginfo_class_DOMParentNode_append
-ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_DOMNode___sleep, 0, 0, IS_ARRAY, 0)
-ZEND_END_ARG_INFO()
-
-#define arginfo_class_DOMNode___wakeup arginfo_class_DOMChildNode_remove
-
ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOMNode_appendChild, 0, 0, 1)
ZEND_ARG_OBJ_INFO(0, node, DOMNode, 0)
ZEND_END_ARG_INFO()
@@ -123,6 +118,11 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_DOMNode_compareDocumentPos
ZEND_ARG_OBJ_INFO(0, other, DOMNode, 0)
ZEND_END_ARG_INFO()
+ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_DOMNode___sleep, 0, 0, IS_ARRAY, 0)
+ZEND_END_ARG_INFO()
+
+#define arginfo_class_DOMNode___wakeup arginfo_class_DOMChildNode_remove
+
#define arginfo_class_DOMNameSpaceNode___sleep arginfo_class_DOMNode___sleep
#define arginfo_class_DOMNameSpaceNode___wakeup arginfo_class_DOMChildNode_remove
@@ -541,12 +541,37 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOMXPath_registe
ZEND_END_ARG_INFO()
#endif
+ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_HTML5Document___construct, 0, 0, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, xmlVersion, IS_STRING, 0, "\"1.0\"")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 0, "\"\"")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_DOM_HTML5Document_load, 0, 1, _IS_BOOL, 0)
+ ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_DOM_HTML5Document_loadXML, 0, 1, _IS_BOOL, 0)
+ ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ZEND_END_ARG_INFO()
+
+#define arginfo_class_DOM_HTML5Document_loadHTML arginfo_class_DOM_HTML5Document_loadXML
+
+#define arginfo_class_DOM_HTML5Document_loadHTMLFile arginfo_class_DOM_HTML5Document_load
+
+ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTML5Document_saveHTML, 0, 0, MAY_BE_STRING|MAY_BE_FALSE)
+ ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOMNode, 1, "null")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTML5Document_saveHTMLFile, 0, 1, MAY_BE_LONG|MAY_BE_FALSE)
+ ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0)
+ZEND_END_ARG_INFO()
+
ZEND_FUNCTION(dom_import_simplexml);
ZEND_METHOD(DOMCdataSection, __construct);
ZEND_METHOD(DOMComment, __construct);
-ZEND_METHOD(DOMNode, __sleep);
-ZEND_METHOD(DOMNode, __wakeup);
ZEND_METHOD(DOMNode, appendChild);
ZEND_METHOD(DOMNode, C14N);
ZEND_METHOD(DOMNode, C14NFile);
@@ -568,6 +593,8 @@ ZEND_METHOD(DOMNode, replaceChild);
ZEND_METHOD(DOMNode, contains);
ZEND_METHOD(DOMNode, getRootNode);
ZEND_METHOD(DOMNode, compareDocumentPosition);
+ZEND_METHOD(DOMNode, __sleep);
+ZEND_METHOD(DOMNode, __wakeup);
ZEND_METHOD(DOMImplementation, getFeature);
ZEND_METHOD(DOMImplementation, hasFeature);
ZEND_METHOD(DOMImplementation, createDocumentType);
@@ -688,6 +715,13 @@ ZEND_METHOD(DOMXPath, registerNamespace);
#if defined(LIBXML_XPATH_ENABLED)
ZEND_METHOD(DOMXPath, registerPhpFunctions);
#endif
+ZEND_METHOD(DOM_HTML5Document, __construct);
+ZEND_METHOD(DOM_HTML5Document, load);
+ZEND_METHOD(DOM_HTML5Document, loadXML);
+ZEND_METHOD(DOM_HTML5Document, loadHTML);
+ZEND_METHOD(DOM_HTML5Document, loadHTMLFile);
+ZEND_METHOD(DOM_HTML5Document, saveHTML);
+ZEND_METHOD(DOM_HTML5Document, saveHTMLFile);
static const zend_function_entry ext_functions[] = {
@@ -731,8 +765,6 @@ static const zend_function_entry class_DOMChildNode_methods[] = {
static const zend_function_entry class_DOMNode_methods[] = {
- ZEND_ME(DOMNode, __sleep, arginfo_class_DOMNode___sleep, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMNode, __wakeup, arginfo_class_DOMNode___wakeup, ZEND_ACC_PUBLIC)
ZEND_ME(DOMNode, appendChild, arginfo_class_DOMNode_appendChild, ZEND_ACC_PUBLIC)
ZEND_ME(DOMNode, C14N, arginfo_class_DOMNode_C14N, ZEND_ACC_PUBLIC)
ZEND_ME(DOMNode, C14NFile, arginfo_class_DOMNode_C14NFile, ZEND_ACC_PUBLIC)
@@ -754,6 +786,8 @@ static const zend_function_entry class_DOMNode_methods[] = {
ZEND_ME(DOMNode, contains, arginfo_class_DOMNode_contains, ZEND_ACC_PUBLIC)
ZEND_ME(DOMNode, getRootNode, arginfo_class_DOMNode_getRootNode, ZEND_ACC_PUBLIC)
ZEND_ME(DOMNode, compareDocumentPosition, arginfo_class_DOMNode_compareDocumentPosition, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOMNode, __sleep, arginfo_class_DOMNode___sleep, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOMNode, __wakeup, arginfo_class_DOMNode___wakeup, ZEND_ACC_PUBLIC)
ZEND_FE_END
};
@@ -961,6 +995,18 @@ static const zend_function_entry class_DOMXPath_methods[] = {
};
#endif
+
+static const zend_function_entry class_DOM_HTML5Document_methods[] = {
+ ZEND_ME(DOM_HTML5Document, __construct, arginfo_class_DOM_HTML5Document___construct, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_HTML5Document, load, arginfo_class_DOM_HTML5Document_load, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_HTML5Document, loadXML, arginfo_class_DOM_HTML5Document_loadXML, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_HTML5Document, loadHTML, arginfo_class_DOM_HTML5Document_loadHTML, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_HTML5Document, loadHTMLFile, arginfo_class_DOM_HTML5Document_loadHTMLFile, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_HTML5Document, saveHTML, arginfo_class_DOM_HTML5Document_saveHTML, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_HTML5Document, saveHTMLFile, arginfo_class_DOM_HTML5Document_saveHTMLFile, ZEND_ACC_PUBLIC)
+ ZEND_FE_END
+};
+
static void register_php_dom_symbols(int module_number)
{
REGISTER_LONG_CONSTANT("XML_ELEMENT_NODE", XML_ELEMENT_NODE, CONST_PERSISTENT);
@@ -1011,6 +1057,7 @@ static void register_php_dom_symbols(int module_number)
REGISTER_LONG_CONSTANT("DOM_NAMESPACE_ERR", NAMESPACE_ERR, CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("DOM_INVALID_ACCESS_ERR", INVALID_ACCESS_ERR, CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("DOM_VALIDATION_ERR", VALIDATION_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\HTML_NO_DEFAULT_NS", DOM_HTML_NO_DEFAULT_NS, CONST_PERSISTENT);
}
static zend_class_entry *register_class_DOMDocumentType(zend_class_entry *class_entry_DOMNode)
@@ -1865,3 +1912,13 @@ static zend_class_entry *register_class_DOMXPath(void)
return class_entry;
}
#endif
+
+static zend_class_entry *register_class_DOM_HTML5Document(zend_class_entry *class_entry_DOMDocument)
+{
+ zend_class_entry ce, *class_entry;
+
+ INIT_NS_CLASS_ENTRY(ce, "DOM", "HTML5Document", class_DOM_HTML5Document_methods);
+ class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMDocument);
+
+ return class_entry;
+}
diff --git a/ext/dom/tests/HTML5/encoding/Document_GB18030.phpt b/ext/dom/tests/HTML5/encoding/Document_GB18030.phpt
new file mode 100644
index 0000000000000..01c347a08a5f7
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_GB18030.phpt
@@ -0,0 +1,37 @@
+--TEST--
+DOM\HTML5Document GB18030 encoding test
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__ . "/gb18030.html");
+var_dump($dom->encoding);
+$dom->documentElement->firstChild->nextElementSibling->textContent = "é";
+$output = $dom->saveHTML();
+echo $output, "\n";
+$dom->saveHTMLFile(__DIR__ . "/gb18030_output.tmp");
+var_dump(file_get_contents(__DIR__ . "/gb18030_output.tmp") === $output);
+
+echo "--- After changing encoding to UTF-8 ---\n";
+$dom->encoding = "UTF-8";
+echo $dom->saveHTML(), "\n";
+
+?>
+--CLEAN--
+
+--EXPECT--
+string(7) "gb18030"
+
+
+
+
+bool(true)
+--- After changing encoding to UTF-8 ---
+
+
+
+é
diff --git a/ext/dom/tests/HTML5/encoding/Document_Shift_JIS.phpt b/ext/dom/tests/HTML5/encoding/Document_Shift_JIS.phpt
new file mode 100644
index 0000000000000..8a31721957bbb
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_Shift_JIS.phpt
@@ -0,0 +1,41 @@
+--TEST--
+DOM\HTML5Document Shift JIS encoding test
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__ . "/shift_jis.html");
+var_dump($dom->encoding);
+$dom->documentElement->firstChild->nextElementSibling->textContent .= "é";
+$output = $dom->saveHTML();
+echo $output, "\n";
+$dom->saveHTMLFile(__DIR__ . "/shift_jis.tmp");
+var_dump(file_get_contents(__DIR__ . "/shift_jis.tmp") === $output);
+
+echo "--- After changing encoding to UTF-8 ---\n";
+$dom->encoding = "UTF-8";
+echo $dom->saveHTML(), "\n";
+
+?>
+--CLEAN--
+
+--EXPECT--
+string(9) "Shift_JIS"
+
+
+
+
+ ₠
+?
+bool(true)
+--- After changing encoding to UTF-8 ---
+
+
+
+
+ やあ
+é
diff --git a/ext/dom/tests/HTML5/encoding/Document_UTF16BE_BOM.phpt b/ext/dom/tests/HTML5/encoding/Document_UTF16BE_BOM.phpt
new file mode 100644
index 0000000000000..7a05541f89ff8
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_UTF16BE_BOM.phpt
@@ -0,0 +1,39 @@
+--TEST--
+DOM\HTML5Document UTF-16BE BOM encoding test
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__ . "/utf16be_bom.html");
+var_dump($dom->encoding);
+$dom->documentElement->firstChild->nextElementSibling->textContent = "é";
+$output = $dom->saveHTML();
+echo $output, "\n";
+$dom->saveHTMLFile(__DIR__ . "/utf16be_bom_output.tmp");
+var_dump(file_get_contents(__DIR__ . "/utf16be_bom_output.tmp") === $output);
+
+echo "--- After changing encoding to UTF-8 ---\n";
+$dom->encoding = "UTF-8";
+echo $dom->saveHTML(), "\n";
+
+?>
+--CLEAN--
+
+--EXPECTF--
+string(8) "UTF-16BE"
+%0<%0!%0D%0O%0C%0T%0Y%0P%0E%0 %0h%0t%0m%0l%0>%0<%0h%0t%0m%0l%0>%0<%0h%0e%0a%0d%0>%0
+%0<%0!%0-%0-%0 %0i%0n%0t%0e%0n%0t%0i%0o%0n%0a%0l%0 %0l%0i%0e%0s%0 %0a%0n%0d%0 %0d%0e%0c%0e%0i%0t%0 %0-%0-%0>%0
+%0<%0m%0e%0t%0a%0 %0c%0h%0a%0r%0s%0e%0t%0=%0"%0u%0t%0f%0-%08%0"%0>%0
+%0<%0/%0h%0e%0a%0d%0>%0
+%0<%0b%0o%0d%0y%0>%0%0<%0/%0b%0o%0d%0y%0>%0<%0/%0h%0t%0m%0l%0>
+bool(true)
+--- After changing encoding to UTF-8 ---
+
+
+
+
+é
diff --git a/ext/dom/tests/HTML5/encoding/Document_UTF16LE_BOM.phpt b/ext/dom/tests/HTML5/encoding/Document_UTF16LE_BOM.phpt
new file mode 100644
index 0000000000000..c713a50846792
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_UTF16LE_BOM.phpt
@@ -0,0 +1,39 @@
+--TEST--
+DOM\HTML5Document UTF-16LE BOM encoding test
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__ . "/utf16le_bom.html");
+var_dump($dom->encoding);
+$dom->documentElement->firstChild->nextElementSibling->textContent = "é";
+$output = $dom->saveHTML();
+echo $output, "\n";
+$dom->saveHTMLFile(__DIR__ . "/utf16le_bom_output.tmp");
+var_dump(file_get_contents(__DIR__ . "/utf16le_bom_output.tmp") === $output);
+
+echo "--- After changing encoding to UTF-8 ---\n";
+$dom->encoding = "UTF-8";
+echo $dom->saveHTML(), "\n";
+
+?>
+--CLEAN--
+
+--EXPECTF--
+string(8) "UTF-16LE"
+<%0!%0D%0O%0C%0T%0Y%0P%0E%0 %0h%0t%0m%0l%0>%0<%0h%0t%0m%0l%0>%0<%0h%0e%0a%0d%0>%0
+%0<%0!%0-%0-%0 %0i%0n%0t%0e%0n%0t%0i%0o%0n%0a%0l%0 %0l%0i%0e%0s%0 %0a%0n%0d%0 %0d%0e%0c%0e%0i%0t%0 %0-%0-%0>%0
+%0<%0m%0e%0t%0a%0 %0c%0h%0a%0r%0s%0e%0t%0=%0"%0u%0t%0f%0-%08%0"%0>%0
+%0<%0/%0h%0e%0a%0d%0>%0
+%0<%0b%0o%0d%0y%0>%0%0<%0/%0b%0o%0d%0y%0>%0<%0/%0h%0t%0m%0l%0>%0
+bool(true)
+--- After changing encoding to UTF-8 ---
+
+
+
+
+é
diff --git a/ext/dom/tests/HTML5/encoding/Document_UTF8_BOM.phpt b/ext/dom/tests/HTML5/encoding/Document_UTF8_BOM.phpt
new file mode 100644
index 0000000000000..f6901198f0327
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_UTF8_BOM.phpt
@@ -0,0 +1,39 @@
+--TEST--
+DOM\HTML5Document UTF-8 BOM encoding test
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__ . "/utf8_bom.html");
+var_dump($dom->encoding);
+$dom->documentElement->firstChild->nextElementSibling->textContent = "é";
+$output = $dom->saveHTML();
+echo $output, "\n";
+$dom->saveHTMLFile(__DIR__ . "/utf8_bom_output.tmp");
+var_dump(file_get_contents(__DIR__ . "/utf8_bom_output.tmp") === $output);
+
+echo "--- After changing encoding to UTF-8 ---\n";
+$dom->encoding = "UTF-8";
+echo $dom->saveHTML(), "\n";
+
+?>
+--CLEAN--
+
+--EXPECT--
+string(5) "UTF-8"
+
+
+
+
+é
+bool(true)
+--- After changing encoding to UTF-8 ---
+
+
+
+
+é
diff --git a/ext/dom/tests/HTML5/encoding/Document_Windows1251.phpt b/ext/dom/tests/HTML5/encoding/Document_Windows1251.phpt
new file mode 100644
index 0000000000000..f631584f2f8df
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_Windows1251.phpt
@@ -0,0 +1,41 @@
+--TEST--
+DOM\HTML5Document Windows-1251 encoding test
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__ . "/windows1251.html");
+var_dump($dom->encoding);
+$dom->documentElement->firstChild->nextElementSibling->textContent .= "é"; // Note: won't show up in Windows 1251 because it doesn't exist there
+$output = $dom->saveHTML();
+echo $output, "\n";
+$dom->saveHTMLFile(__DIR__ . "/windows1251_output.tmp");
+var_dump(file_get_contents(__DIR__ . "/windows1251_output.tmp") === $output);
+
+echo "--- After changing encoding to UTF-8 ---\n";
+$dom->encoding = "UTF-8";
+echo $dom->saveHTML(), "\n";
+
+?>
+--CLEAN--
+
+--EXPECT--
+string(12) "windows-1251"
+
+
+
+
+ A B C
+?
+bool(true)
+--- After changing encoding to UTF-8 ---
+
+
+
+
+ A ф B б C
+é
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_01.phpt b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_01.phpt
new file mode 100644
index 0000000000000..069c46c162247
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_01.phpt
@@ -0,0 +1,16 @@
+--TEST--
+DOM\HTML5Document edge case encoding 01
+--EXTENSIONS--
+dom
+--FILE--
+ UTF-8
+$dom = new DOM\HTML5Document();
+// Create a UTF-8 string where a UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
+$dom->append(str_repeat("A", 4096 - 2) . "\xf0\x90\x8d\x88AA");
+var_dump($dom->saveHTML());
+
+?>
+--EXPECT--
+string𐍈AA"
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_02.phpt b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_02.phpt
new file mode 100644
index 0000000000000..3c6cf0e715547
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_02.phpt
@@ -0,0 +1,28 @@
+--TEST--
+DOM\HTML5Document edge case encoding 02
+--EXTENSIONS--
+dom
+--FILE--
+ GB18030
+$dom = new DOM\HTML5Document();
+$dom->encoding = "GB18030";
+// Create a UTF-8 string where a UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
+// *and* the sequence also falls over the boundary for the result
+$dom->append(str_repeat("A", 4096 - 2) . "\xf0\x90\x8d\x88AA");
+var_dump($output = $dom->saveHTML());
+
+// GB18030 encoding of the above UTF-8 symbol
+var_dump($output[4094] == "\x90");
+var_dump($output[4095] == "\x30");
+var_dump($output[4096] == "\xd5");
+var_dump($output[4097] == "\x30");
+
+?>
+--EXPECT--
+string
+bool(true)
+bool(true)
+bool(true)
+bool(true)
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_03.phpt b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_03.phpt
new file mode 100644
index 0000000000000..47ee2fc8b8b32
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_03.phpt
@@ -0,0 +1,18 @@
+--TEST--
+DOM\HTML5Document edge case encoding 03
+--EXTENSIONS--
+dom
+--FILE--
+ GB18030
+$dom = new DOM\HTML5Document();
+$dom->encoding = "GB18030";
+// Create a UTF-8 string where an invalid UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
+// Note: the strange ?1?7 sequence is the GB18030 encoding for the unicode replacement character
+$dom->append(str_repeat("A", 4096 - 2) . "\xff\xff\xff");
+var_dump($dom->saveHTML());
+
+?>
+--EXPECT--
+string
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_04.phpt b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_04.phpt
new file mode 100644
index 0000000000000..d3a16bde718ca
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_04.phpt
@@ -0,0 +1,16 @@
+--TEST--
+DOM\HTML5Document edge case encoding 04
+--EXTENSIONS--
+dom
+--FILE--
+ UTF-8
+$dom = new DOM\HTML5Document();
+// Create a UTF-8 string where an invalid UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
+$dom->append(str_repeat("A", 4096 - 2) . "\xff\xff\xff");
+var_dump($dom->saveHTML());
+
+?>
+--EXPECT--
+string���"
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_05.phpt b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_05.phpt
new file mode 100644
index 0000000000000..747789398aaf4
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_05.phpt
@@ -0,0 +1,22 @@
+--TEST--
+DOM\HTML5Document edge case encoding 05
+--EXTENSIONS--
+dom
+--FILE--
+ UTF-8
+$dom = new DOM\HTML5Document();
+$header = " ";
+$padding_required_until_4094 = 4094 - strlen($header);
+// GB18030 byte sequence crossing the 4096 boundary
+$trailer = "\x90\x30\xd5\x30";
+$dom->loadHTML($header . str_repeat("A", $padding_required_until_4094) . $trailer);
+var_dump($dom->encoding);
+$dom->encoding = "UTF-8";
+var_dump($dom->saveHTML());
+
+?>
+--EXPECT--
+string(7) "gb18030"
+string(4112) " AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA𐍈"
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_06.phpt b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_06.phpt
new file mode 100644
index 0000000000000..633a48329d70e
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_06.phpt
@@ -0,0 +1,16 @@
+--TEST--
+DOM\HTML5Document edge case encoding 06
+--EXTENSIONS--
+dom
+--FILE--
+ UTF-8
+$dom = new DOM\HTML5Document();
+// Create a UTF-8 string where a *broken* UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
+$dom->append(str_repeat("A", 4096 - 1) . "\xf0\x90");
+var_dump($dom->saveHTML());
+
+?>
+--EXPECT--
+string��"
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_07.phpt b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_07.phpt
new file mode 100644
index 0000000000000..9758da6a7d12f
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_07.phpt
@@ -0,0 +1,22 @@
+--TEST--
+DOM\HTML5Document edge case encoding 07
+--EXTENSIONS--
+dom
+--FILE--
+ UTF-8
+$dom = new DOM\HTML5Document();
+$header = " ";
+$padding_required_until_4095 = 4095 - strlen($header);
+// GB18030 *broken* byte sequence crossing the 4096 boundary
+$trailer = "\x90\x30";
+$dom->loadHTML($header . str_repeat("A", $padding_required_until_4095) . $trailer);
+var_dump($dom->encoding);
+$dom->encoding = "UTF-8";
+var_dump($dom->saveHTML());
+
+?>
+--EXPECT--
+string(7) "gb18030"
+string��"
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_field_test.phpt b/ext/dom/tests/HTML5/encoding/Document_encoding_field_test.phpt
new file mode 100644
index 0000000000000..c47c4b56c1980
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_encoding_field_test.phpt
@@ -0,0 +1,36 @@
+--TEST--
+DOM\HTML5Document test values for encoding field
+--EXTENSIONS--
+dom
+--FILE--
+encoding);
+$dom->encoding = "CSeuckr";
+var_dump($dom->encoding);
+try {
+ $dom->encoding = "nope";
+} catch (ValueError $e) {
+ echo $e->getMessage(), "\n";
+}
+var_dump($dom->encoding);
+$dom->encoding = "Windows-1251";
+var_dump($dom->encoding);
+try {
+ $dom->encoding = NULL;
+} catch (ValueError $e) {
+ echo $e->getMessage(), "\n";
+}
+var_dump($dom->encoding);
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+NULL
+string(6) "EUC-KR"
+Invalid document encoding
+string(6) "EUC-KR"
+string(12) "windows-1251"
+Invalid document encoding
+string(12) "windows-1251"
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_unicode_error.phpt b/ext/dom/tests/HTML5/encoding/Document_encoding_unicode_error.phpt
new file mode 100644
index 0000000000000..22c7a90fc30c6
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_encoding_unicode_error.phpt
@@ -0,0 +1,27 @@
+--TEST--
+HTML5Document::loadHTML(File) with unicode codepoints resulting in an error
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__."/utf16le_error.html");
+echo "--- loadHTML ---\n";
+$dom->loadHTML(file_get_contents(__DIR__."/utf16le_error.html"));
+?>
+--EXPECTF--
+--- loadHTMLFile ---
+
+Warning: DOM\HTML5Document::loadHTMLFile(): tokenizer error missing-end-tag-name in %s, line: 7, column: 29 in %s on line %d
+
+Warning: DOM\HTML5Document::loadHTMLFile(): tree error unexpected-token in %s, line: 7, column: 14-17 in %s on line %d
+
+Warning: DOM\HTML5Document::loadHTMLFile(): tree error unexpected-token in %s, line: 8, column: 7-10 in %s on line %d
+--- loadHTML ---
+
+Warning: DOM\HTML5Document::loadHTML(): tokenizer error missing-end-tag-name in Entity, line: 7, column: 29 in %s on line %d
+
+Warning: DOM\HTML5Document::loadHTML(): tree error unexpected-token in Entity, line: 7, column: 14-17 in %s on line %d
+
+Warning: DOM\HTML5Document::loadHTML(): tree error unexpected-token in Entity, line: 8, column: 7-10 in %s on line %d
diff --git a/ext/dom/tests/HTML5/encoding/Document_fallback_encoding.phpt b/ext/dom/tests/HTML5/encoding/Document_fallback_encoding.phpt
new file mode 100644
index 0000000000000..f3ec81d8f25f1
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_fallback_encoding.phpt
@@ -0,0 +1,24 @@
+--TEST--
+DOM\HTML5Document fallback encoding test
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__ . "/fallback_encoding.html");
+var_dump($dom->encoding);
+echo $dom->saveHTML();
+
+?>
+--CLEAN--
+
+--EXPECT--
+string(5) "UTF-8"
+
+
+
+
+
diff --git a/ext/dom/tests/HTML5/encoding/Document_load_different_encoding.phpt b/ext/dom/tests/HTML5/encoding/Document_load_different_encoding.phpt
new file mode 100644
index 0000000000000..866e9223f9ef8
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/Document_load_different_encoding.phpt
@@ -0,0 +1,19 @@
+--TEST--
+DOM\HTML5Document load document with different encoding
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__ . "/windows1251.html");
+var_dump($dom->encoding);
+$dom->loadHTML("hé
", LIBXML_NOERROR);
+var_dump($dom->encoding);
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+string(12) "windows-1251"
+string(5) "UTF-8"
+hé
diff --git a/ext/dom/tests/HTML5/encoding/fallback_encoding.html b/ext/dom/tests/HTML5/encoding/fallback_encoding.html
new file mode 100644
index 0000000000000..4191de914d2d2
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/fallback_encoding.html
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/ext/dom/tests/HTML5/encoding/gb18030.html b/ext/dom/tests/HTML5/encoding/gb18030.html
new file mode 100644
index 0000000000000..423711cd5f335
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/gb18030.html
@@ -0,0 +1,7 @@
+
+
+
+
+
+ Hllo, world!
+
\ No newline at end of file
diff --git a/ext/dom/tests/HTML5/encoding/shift_jis.html b/ext/dom/tests/HTML5/encoding/shift_jis.html
new file mode 100644
index 0000000000000..3da08c8f24b11
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/shift_jis.html
@@ -0,0 +1,7 @@
+
+
+
+
+
+ ₠
+
\ No newline at end of file
diff --git a/ext/dom/tests/HTML5/encoding/utf16be_bom.html b/ext/dom/tests/HTML5/encoding/utf16be_bom.html
new file mode 100644
index 0000000000000000000000000000000000000000..9a50e89a57727fcd06cc56f0ff68a9c86967109a
GIT binary patch
literal 212
zcmZ9GF%H5o3`O6}DN=UC$^hJ@G(oA976fJB^3X>k7*Leh{{PwW>&>2q$c2uHCsz`I
zY(7vpveM6LtxT;NHl9GwD3zACw||r=>P07UH*3|RF$AYAtV$f_O8c{U(c3?$eEs*2Nx27
z!F;4}U`4A+bTU;nY&?O?B$bvo+ds(^{bD6?GwbxCF{*oSmJdsJoiysrQo-tAyxR3~
Y_N=)ZPqI7nN^k%6IQnOF<8Sq+-`P&MZ7Mo!m6G)9wu(hbs7)Ot5%Mw#HR&zZk^Mytts&Gp}96@IGjtMW=|
N_I0ML@8*5ut`EM0Ckp@o
literal 0
HcmV?d00001
diff --git a/ext/dom/tests/HTML5/encoding/utf8_bom.html b/ext/dom/tests/HTML5/encoding/utf8_bom.html
new file mode 100644
index 0000000000000..45319b3bda3a8
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/utf8_bom.html
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/ext/dom/tests/HTML5/encoding/windows1251.html b/ext/dom/tests/HTML5/encoding/windows1251.html
new file mode 100644
index 0000000000000..928f5922c0971
--- /dev/null
+++ b/ext/dom/tests/HTML5/encoding/windows1251.html
@@ -0,0 +1,7 @@
+
+
+
+
+
+ A B C
+
\ No newline at end of file
diff --git a/ext/dom/tests/HTML5/interactions/Document_adopt_DOMDocument.phpt b/ext/dom/tests/HTML5/interactions/Document_adopt_DOMDocument.phpt
new file mode 100644
index 0000000000000..a0aa2ba771813
--- /dev/null
+++ b/ext/dom/tests/HTML5/interactions/Document_adopt_DOMDocument.phpt
@@ -0,0 +1,30 @@
+--TEST--
+DOM\HTML5Document adopts a DOMDocument
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML(<<
+
+
+
+
+
+
+HTML);
+
+$dom2 = new DOM\HTML5Document();
+$dom2->appendChild($dom2->adoptNode($dom->documentElement));
+echo $dom2->saveHTML();
+
+?>
+--EXPECT--
+
+
+
+
+
+
diff --git a/ext/dom/tests/HTML5/interactions/Document_clone.phpt b/ext/dom/tests/HTML5/interactions/Document_clone.phpt
new file mode 100644
index 0000000000000..42c9e8bb9f788
--- /dev/null
+++ b/ext/dom/tests/HTML5/interactions/Document_clone.phpt
@@ -0,0 +1,31 @@
+--TEST--
+Cloning a DOM\HTML5Document
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML("foo
");
+
+$dom2 = clone $dom;
+var_dump($dom2->firstChild->tagName);
+var_dump($dom2->firstChild->textContent);
+$dom2->loadHTML("bar ");
+var_dump($dom2->firstChild->tagName);
+var_dump($dom2->firstChild->textContent);
+
+$element = $dom2->firstChild;
+unset($dom2);
+var_dump(get_class($element->ownerDocument));
+
+?>
+--EXPECTF--
+Warning: DOM\HTML5Document::loadHTML(): tree error unexpected-token-in-initial-mode in Entity, line: 1, column: 2 in %s on line %d
+string(4) "html"
+string(3) "foo"
+
+Warning: DOM\HTML5Document::loadHTML(): tree error unexpected-token-in-initial-mode in Entity, line: 1, column: 2-7 in %s on line %d
+string(4) "html"
+string(3) "bar"
+string(17) "DOM\HTML5Document"
diff --git a/ext/dom/tests/HTML5/interactions/Document_node_ownerDocument_for_XML.phpt b/ext/dom/tests/HTML5/interactions/Document_node_ownerDocument_for_XML.phpt
new file mode 100644
index 0000000000000..a5b027113063a
--- /dev/null
+++ b/ext/dom/tests/HTML5/interactions/Document_node_ownerDocument_for_XML.phpt
@@ -0,0 +1,98 @@
+--TEST--
+HTML5Document getting ownerDocument from a node in an XML document should yield a HTML5Document
+--EXTENSIONS--
+dom
+--FILE--
+loadXML(' ');
+
+$element = $dom->documentElement;
+unset($dom);
+var_dump($element->ownerDocument);
+
+?>
+--EXPECTF--
+object(DOM\HTML5Document)#1 (40) {
+ ["encoding"]=>
+ NULL
+ ["doctype"]=>
+ NULL
+ ["implementation"]=>
+ string(22) "(object value omitted)"
+ ["documentElement"]=>
+ string(22) "(object value omitted)"
+ ["actualEncoding"]=>
+ NULL
+ ["xmlEncoding"]=>
+ NULL
+ ["standalone"]=>
+ bool(false)
+ ["xmlStandalone"]=>
+ bool(false)
+ ["version"]=>
+ string(3) "1.0"
+ ["xmlVersion"]=>
+ string(3) "1.0"
+ ["strictErrorChecking"]=>
+ bool(true)
+ ["documentURI"]=>
+ string(%d) %s
+ ["config"]=>
+ NULL
+ ["formatOutput"]=>
+ bool(false)
+ ["validateOnParse"]=>
+ bool(false)
+ ["resolveExternals"]=>
+ bool(false)
+ ["preserveWhiteSpace"]=>
+ bool(true)
+ ["recover"]=>
+ bool(false)
+ ["substituteEntities"]=>
+ bool(false)
+ ["firstElementChild"]=>
+ string(22) "(object value omitted)"
+ ["lastElementChild"]=>
+ string(22) "(object value omitted)"
+ ["childElementCount"]=>
+ int(1)
+ ["nodeName"]=>
+ string(9) "#document"
+ ["nodeValue"]=>
+ NULL
+ ["nodeType"]=>
+ int(9)
+ ["parentNode"]=>
+ NULL
+ ["parentElement"]=>
+ NULL
+ ["childNodes"]=>
+ string(22) "(object value omitted)"
+ ["firstChild"]=>
+ string(22) "(object value omitted)"
+ ["lastChild"]=>
+ string(22) "(object value omitted)"
+ ["previousSibling"]=>
+ NULL
+ ["nextSibling"]=>
+ NULL
+ ["attributes"]=>
+ NULL
+ ["isConnected"]=>
+ bool(true)
+ ["ownerDocument"]=>
+ NULL
+ ["namespaceURI"]=>
+ NULL
+ ["prefix"]=>
+ string(0) ""
+ ["localName"]=>
+ NULL
+ ["baseURI"]=>
+ string(%d) %s
+ ["textContent"]=>
+ string(0) ""
+}
diff --git a/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_01.phpt b/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_01.phpt
new file mode 100644
index 0000000000000..398d80393b323
--- /dev/null
+++ b/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_01.phpt
@@ -0,0 +1,28 @@
+--TEST--
+DOM\HTML5Document::registerNodeClass 01
+--EXTENSIONS--
+dom
+--FILE--
+firstChild->textContent;
+ }
+}
+
+$dom = new DOM\HTML5Document();
+$dom->registerNodeClass("DOM\HTML5Document", "CustomDOMHTML5Document");
+$dom->loadHTML("foo
", LIBXML_NOERROR);
+
+$element = $dom->documentElement;
+unset($dom);
+
+$dom = $element->ownerDocument;
+var_dump($dom instanceof CustomDOMHTML5Document);
+var_dump($dom->test());
+
+?>
+--EXPECT--
+bool(true)
+string(3) "foo"
diff --git a/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_02.phpt b/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_02.phpt
new file mode 100644
index 0000000000000..61566a97772ec
--- /dev/null
+++ b/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_02.phpt
@@ -0,0 +1,17 @@
+--TEST--
+DOM\HTML5Document::registerNodeClass 02
+--EXTENSIONS--
+dom
+--FILE--
+registerNodeClass("DOM\\HTML5Document", "DOMDocument");
+
+?>
+--EXPECTF--
+Fatal error: Uncaught Error: DOMDocument::registerNodeClass(): Argument #2 ($extendedClass) must be a class name derived from DOM\HTML5Document or null, DOMDocument given in %s:%d
+Stack trace:
+#0 %s(%d): DOMDocument->registerNodeClass('DOM\\HTML5Docume...', 'DOMDocument')
+#1 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_03.phpt b/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_03.phpt
new file mode 100644
index 0000000000000..10609846de093
--- /dev/null
+++ b/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_03.phpt
@@ -0,0 +1,26 @@
+--TEST--
+DOM\HTML5Document::registerNodeClass 03
+--EXTENSIONS--
+dom
+--FILE--
+registerNodeClass("DOMDocument", "DOM\\HTML5Document");
+
+$element = $dom->appendChild($dom->createElement("foo"));
+unset($dom);
+
+var_dump(get_class($element->ownerDocument));
+
+$dom = $element->ownerDocument;
+unset($element);
+$element = $dom->documentElement;
+unset($dom);
+
+var_dump(get_class($element->ownerDocument));
+
+?>
+--EXPECT--
+string(17) "DOM\HTML5Document"
+string(17) "DOM\HTML5Document"
diff --git a/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_01.phpt b/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_01.phpt
new file mode 100644
index 0000000000000..6f6686384ead3
--- /dev/null
+++ b/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_01.phpt
@@ -0,0 +1,107 @@
+--TEST--
+HTML5 document should retain properties and ownerDocument relation 01
+--EXTENSIONS--
+dom
+--FILE--
+strictErrorChecking = false;
+$dom->loadHTML("foo
", LIBXML_NOERROR);
+
+// Destroy reference to the DOM
+$child = $dom->documentElement;
+unset($dom);
+
+// Regain reference using the ownerDocument property
+// Should be a DOM\HTML5Document
+$dom = $child->ownerDocument;
+var_dump($dom);
+// Test if property is preserved (any random doc_props property will do)
+var_dump($dom->strictErrorChecking);
+
+?>
+--EXPECT--
+object(DOM\HTML5Document)#1 (40) {
+ ["encoding"]=>
+ string(5) "UTF-8"
+ ["doctype"]=>
+ NULL
+ ["implementation"]=>
+ string(22) "(object value omitted)"
+ ["documentElement"]=>
+ string(22) "(object value omitted)"
+ ["actualEncoding"]=>
+ string(5) "UTF-8"
+ ["xmlEncoding"]=>
+ string(5) "UTF-8"
+ ["standalone"]=>
+ bool(true)
+ ["xmlStandalone"]=>
+ bool(true)
+ ["version"]=>
+ NULL
+ ["xmlVersion"]=>
+ NULL
+ ["strictErrorChecking"]=>
+ bool(false)
+ ["documentURI"]=>
+ NULL
+ ["config"]=>
+ NULL
+ ["formatOutput"]=>
+ bool(false)
+ ["validateOnParse"]=>
+ bool(false)
+ ["resolveExternals"]=>
+ bool(false)
+ ["preserveWhiteSpace"]=>
+ bool(true)
+ ["recover"]=>
+ bool(false)
+ ["substituteEntities"]=>
+ bool(false)
+ ["firstElementChild"]=>
+ string(22) "(object value omitted)"
+ ["lastElementChild"]=>
+ string(22) "(object value omitted)"
+ ["childElementCount"]=>
+ int(1)
+ ["nodeName"]=>
+ string(9) "#document"
+ ["nodeValue"]=>
+ NULL
+ ["nodeType"]=>
+ int(13)
+ ["parentNode"]=>
+ NULL
+ ["parentElement"]=>
+ NULL
+ ["childNodes"]=>
+ string(22) "(object value omitted)"
+ ["firstChild"]=>
+ string(22) "(object value omitted)"
+ ["lastChild"]=>
+ string(22) "(object value omitted)"
+ ["previousSibling"]=>
+ NULL
+ ["nextSibling"]=>
+ NULL
+ ["attributes"]=>
+ NULL
+ ["isConnected"]=>
+ bool(true)
+ ["ownerDocument"]=>
+ NULL
+ ["namespaceURI"]=>
+ NULL
+ ["prefix"]=>
+ string(0) ""
+ ["localName"]=>
+ NULL
+ ["baseURI"]=>
+ NULL
+ ["textContent"]=>
+ string(3) "foo"
+}
+bool(false)
diff --git a/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_02.phpt b/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_02.phpt
new file mode 100644
index 0000000000000..676c127e167d3
--- /dev/null
+++ b/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_02.phpt
@@ -0,0 +1,106 @@
+--TEST--
+HTML5 document should retain properties and ownerDocument relation 02
+--EXTENSIONS--
+dom
+--FILE--
+strictErrorChecking = false;
+$child = $dom->appendChild($dom->createElement('html'));
+
+// Destroy reference to the DOM
+unset($dom);
+
+// Regain reference using the ownerDocument property
+// Should be a DOM\HTML5Document
+$dom = $child->ownerDocument;
+var_dump($dom);
+// Test if property is preserved (any random doc_props property will do)
+var_dump($dom->strictErrorChecking);
+
+?>
+--EXPECT--
+object(DOM\HTML5Document)#1 (40) {
+ ["encoding"]=>
+ NULL
+ ["doctype"]=>
+ NULL
+ ["implementation"]=>
+ string(22) "(object value omitted)"
+ ["documentElement"]=>
+ string(22) "(object value omitted)"
+ ["actualEncoding"]=>
+ NULL
+ ["xmlEncoding"]=>
+ NULL
+ ["standalone"]=>
+ bool(false)
+ ["xmlStandalone"]=>
+ bool(false)
+ ["version"]=>
+ string(3) "1.0"
+ ["xmlVersion"]=>
+ string(3) "1.0"
+ ["strictErrorChecking"]=>
+ bool(false)
+ ["documentURI"]=>
+ NULL
+ ["config"]=>
+ NULL
+ ["formatOutput"]=>
+ bool(false)
+ ["validateOnParse"]=>
+ bool(false)
+ ["resolveExternals"]=>
+ bool(false)
+ ["preserveWhiteSpace"]=>
+ bool(true)
+ ["recover"]=>
+ bool(false)
+ ["substituteEntities"]=>
+ bool(false)
+ ["firstElementChild"]=>
+ string(22) "(object value omitted)"
+ ["lastElementChild"]=>
+ string(22) "(object value omitted)"
+ ["childElementCount"]=>
+ int(1)
+ ["nodeName"]=>
+ string(9) "#document"
+ ["nodeValue"]=>
+ NULL
+ ["nodeType"]=>
+ int(9)
+ ["parentNode"]=>
+ NULL
+ ["parentElement"]=>
+ NULL
+ ["childNodes"]=>
+ string(22) "(object value omitted)"
+ ["firstChild"]=>
+ string(22) "(object value omitted)"
+ ["lastChild"]=>
+ string(22) "(object value omitted)"
+ ["previousSibling"]=>
+ NULL
+ ["nextSibling"]=>
+ NULL
+ ["attributes"]=>
+ NULL
+ ["isConnected"]=>
+ bool(true)
+ ["ownerDocument"]=>
+ NULL
+ ["namespaceURI"]=>
+ NULL
+ ["prefix"]=>
+ string(0) ""
+ ["localName"]=>
+ NULL
+ ["baseURI"]=>
+ NULL
+ ["textContent"]=>
+ string(0) ""
+}
+bool(false)
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_DOM_HTML_NO_DEFAULT_NS copy.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_DOM_HTML_NO_DEFAULT_NS copy.phpt
new file mode 100644
index 0000000000000..ce8a38c8b5230
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_DOM_HTML_NO_DEFAULT_NS copy.phpt
@@ -0,0 +1,39 @@
+--TEST--
+Document::loadHTMLFile() with DOM\HTML_NO_DEFAULT_NS
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__ . "/paragraph.html", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
+$xpath = new DOMXPath($dom);
+$xpath->registerNamespace("x", "http://www.w3.org/1999/xhtml");
+var_dump($xpath->query("//p"));
+var_dump($xpath->query("//x:p"));
+
+$dom = new DOM\HTML5Document();
+$dom->loadHTMLFile(__DIR__ . "/paragraph.html", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR | DOM\HTML_NO_DEFAULT_NS);
+$xpath = new DOMXPath($dom);
+$xpath->registerNamespace("x", "http://www.w3.org/1999/xhtml");
+var_dump($xpath->query("//p"));
+var_dump($xpath->query("//x:p"));
+
+?>
+--EXPECT--
+object(DOMNodeList)#3 (1) {
+ ["length"]=>
+ int(0)
+}
+object(DOMNodeList)#4 (1) {
+ ["length"]=>
+ int(1)
+}
+object(DOMNodeList)#3 (1) {
+ ["length"]=>
+ int(1)
+}
+object(DOMNodeList)#3 (1) {
+ ["length"]=>
+ int(0)
+}
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_empty_path.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_empty_path.phpt
new file mode 100644
index 0000000000000..6b71390fe183b
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_empty_path.phpt
@@ -0,0 +1,19 @@
+--TEST--
+DOM\HTML5Document::loadHTMLFile() - empty path
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile("");
+echo $dom->saveHTML(), "\n";
+
+?>
+--EXPECTF--
+Fatal error: Uncaught ValueError: Path cannot be empty in %s:%d
+Stack trace:
+#0 %s(%d): DOM\HTML5Document->loadHTMLFile('')
+#1 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_existing_file.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_existing_file.phpt
new file mode 100644
index 0000000000000..5bb31af73ab12
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_existing_file.phpt
@@ -0,0 +1,24 @@
+--TEST--
+DOM\HTML5Document::loadHTMLFile() - local existing file
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__ . "/../../test.html");
+echo $dom->saveHTML(), "\n";
+
+?>
+--EXPECTF--
+Warning: DOM\HTML5Document::loadHTMLFile(): tree error unexpected-token-in-initial-mode in %s on line %d
+
+Hello world
+
+
+This is a not well-formed
+html files with undeclared entities
+
+
+
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_file_does_not_exist.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_file_does_not_exist.phpt
new file mode 100644
index 0000000000000..9bc624a8d9109
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_file_does_not_exist.phpt
@@ -0,0 +1,15 @@
+--TEST--
+DOM\HTML5Document::loadHTMLFile() - local file that does not exist
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__ . "/foobar");
+echo $dom->saveHTML(), "\n";
+
+?>
+--EXPECTF--
+Warning: DOM\HTML5Document::loadHTMLFile(%s): Failed to open stream: No such file or directory in %s on line %d
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_nul_terminator_cases_path.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_nul_terminator_cases_path.phpt
new file mode 100644
index 0000000000000..cdad0bf1c28a2
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_nul_terminator_cases_path.phpt
@@ -0,0 +1,21 @@
+--TEST--
+DOM\HTML5Document::loadHTMLFile() - NUL terminator cases path
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile("\0");
+} catch (Error $e) {
+ echo $e->getMessage(), "\n";
+}
+$dom->loadHTMLFile('%00');
+
+?>
+--EXPECTF--
+DOM\HTML5Document::loadHTMLFile(): Argument #1 ($filename) must not contain any null bytes
+
+Warning: DOM\HTML5Document::loadHTMLFile(): URI must not contain percent-encoded NUL bytes in %s on line %d
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_01.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_01.phpt
new file mode 100644
index 0000000000000..8b95d43205677
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_01.phpt
@@ -0,0 +1,20 @@
+--TEST--
+DOM\HTML5Document::loadHTMLFile() - parser warning 01
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__."/parser_warning_01.html", LIBXML_NOERROR);
+echo $dom->saveHTML(), "\n";
+
+?>
+--EXPECT--
+foo
+
+
+
+
+
+error
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_02.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_02.phpt
new file mode 100644
index 0000000000000..de2c9ce8b16b0
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_02.phpt
@@ -0,0 +1,21 @@
+--TEST--
+DOM\HTML5Document::loadHTMLFile() - parser warning 02
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__."/parser_warning_02.html", LIBXML_NOERROR);
+echo $dom->saveHTML(), "\n";
+
+?>
+--EXPECT--
+
+ foo
+
+
+ -->
+
+
+
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_03.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_03.phpt
new file mode 100644
index 0000000000000..7b58fee4ca484
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_03.phpt
@@ -0,0 +1,17 @@
+--TEST--
+DOM\HTML5Document::loadHTMLFile() - parser warning 03
+--EXTENSIONS--
+dom
+--FILE--
+loadHTMLFile(__DIR__."/parser_warning_03.html", LIBXML_NOERROR);
+echo $dom->saveHTML(), "\n";
+
+?>
+--EXPECT--
+
+
+
+
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_failing_stream_wrapper.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_failing_stream_wrapper.phpt
new file mode 100644
index 0000000000000..31bd37e18b23f
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_failing_stream_wrapper.phpt
@@ -0,0 +1,51 @@
+--TEST--
+DOM\HTML5Document::loadHTMLFile() with failing stream wrapper
+--EXTENSIONS--
+dom
+--FILE--
+fail) {
+ throw new Error("fail");
+ }
+ $this->fail = true;
+ return str_repeat("X", $count);
+ }
+
+ public function stream_eof() {
+ return false;
+ }
+
+ public function stream_close() {
+ return true;
+ }
+}
+
+stream_wrapper_register("fail", FailingWrapper::class, 0);
+
+$dom = new DOM\HTML5Document();
+
+try {
+ $dom->loadHTMLFile("fail://x");
+} catch (Exception $e) {
+ echo $e->getMessage(), "\n";
+}
+echo $dom->saveHTML(), "\n";
+
+?>
+--EXPECTF--
+Fatal error: Uncaught Error: fail in %s:%d
+Stack trace:
+#0 [internal function]: FailingWrapper->stream_read(8192)
+#1 %s(%d): DOM\HTML5Document->loadHTMLFile('fail://x')
+#2 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_working_stream_wrapper.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_working_stream_wrapper.phpt
new file mode 100644
index 0000000000000..d18ef6ad6a1d9
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_working_stream_wrapper.phpt
@@ -0,0 +1,62 @@
+--TEST--
+DOM\HTML5Document::loadHTMLFile() with working stream wrapper
+--EXTENSIONS--
+dom
+--FILE--
+data = substr($path, 6);
+ return true;
+ }
+
+ public function stream_read($count) {
+ $ret = substr($this->data, $this->position, $count);
+ $this->position += $count;
+ return $ret;
+ }
+
+ public function stream_eof() {
+ return $this->position >= strlen($this->data);
+ }
+
+ public function stream_close() {
+ return true;
+ }
+}
+
+stream_wrapper_register("euw", EchoUriWrapper::class, 0);
+
+$dom = new DOM\HTML5Document();
+
+echo "--- Stream wrapper case ---\n";
+
+$dom->loadHTMLFile("euw://hello
");
+echo $dom->saveHTML(), "\n";
+
+echo "--- Stream wrapper in two chunks case ---\n";
+
+libxml_use_internal_errors(true);
+// To properly test this, keep the 4096 in sync with document.c's input stream buffer size.
+$dom->loadHTMLFile("euw://" . str_repeat("\n", 4096-22) . "<>");
+echo $dom->saveHTML(), "\n";
+
+foreach (libxml_get_errors() as $error) {
+ var_dump($error->line, $error->column);
+}
+
+?>
+--EXPECTF--
+--- Stream wrapper case ---
+
+Warning: DOM\HTML5Document::loadHTMLFile(): tree error unexpected-token-in-initial-mode in euw://hello
, line: 1, column: 2 in %s on line %d
+hello
+--- Stream wrapper in two chunks case ---
+<>
+int(4075)
+int(2)
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_DOM_HTML_NO_DEFAULT_NS.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_DOM_HTML_NO_DEFAULT_NS.phpt
new file mode 100644
index 0000000000000..fbb266afaa8a2
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTML_DOM_HTML_NO_DEFAULT_NS.phpt
@@ -0,0 +1,39 @@
+--TEST--
+Document::loadHTML() with DOM\HTML_NO_DEFAULT_NS
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML(file_get_contents(__DIR__ . "/paragraph.html"), LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
+$xpath = new DOMXPath($dom);
+$xpath->registerNamespace("x", "http://www.w3.org/1999/xhtml");
+var_dump($xpath->query("//p"));
+var_dump($xpath->query("//x:p"));
+
+$dom = new DOM\HTML5Document();
+$dom->loadHTML(file_get_contents(__DIR__ . "/paragraph.html"), LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR | DOM\HTML_NO_DEFAULT_NS);
+$xpath = new DOMXPath($dom);
+$xpath->registerNamespace("x", "http://www.w3.org/1999/xhtml");
+var_dump($xpath->query("//p"));
+var_dump($xpath->query("//x:p"));
+
+?>
+--EXPECT--
+object(DOMNodeList)#3 (1) {
+ ["length"]=>
+ int(0)
+}
+object(DOMNodeList)#4 (1) {
+ ["length"]=>
+ int(1)
+}
+object(DOMNodeList)#3 (1) {
+ ["length"]=>
+ int(1)
+}
+object(DOMNodeList)#3 (1) {
+ ["length"]=>
+ int(0)
+}
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_COMPACT.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_COMPACT.phpt
new file mode 100644
index 0000000000000..06ae52ee862bb
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_COMPACT.phpt
@@ -0,0 +1,41 @@
+--TEST--
+HTML5Document::loadHTML() with LIBXML_COMPACT
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML(<<
+
+
+
+
+ x
+ foo
+ foox
+ fooxx
+ fooxxx
+ fooxxxx
+ fooxxxxx
+ this does not fit
+
+
+HTML, LIBXML_COMPACT);
+
+$xpath = new DOMXPath($dom);
+foreach ($xpath->query("//*[name()='p']") as $p) {
+ echo $p->textContent, "\n";
+}
+
+?>
+--EXPECT--
+x
+foo
+foox
+fooxx
+fooxxx
+fooxxxx
+fooxxxxx
+this does not fit
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED.phpt
new file mode 100644
index 0000000000000..6d11a41b727c4
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED.phpt
@@ -0,0 +1,93 @@
+--TEST--
+DOM\HTML5Document::loadHTML() with LIBXML_HTML_NOIMPLIED
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
+ $output = $dom->saveHTML();
+ echo $output, "\n";
+
+ // Also test the loadHTMLFile variation. We won't print out the result, just checking the result is the same.
+ $temp = fopen(__DIR__."/DOM_HTML5Document_loadHTML_LIBXML_HTML_NOIMPLIED_input.tmp", "w");
+ fwrite($temp, $html);
+ fclose($temp);
+ $dom->loadHTMLFile(__DIR__."/DOM_HTML5Document_loadHTML_LIBXML_HTML_NOIMPLIED_input.tmp", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
+ var_dump($output === $dom->saveHTML());
+}
+
+$dom = new DOM\HTML5Document();
+
+echo "--- Missing html, head, body ---\n";
+test("");
+test("foobarbaz");
+test("foo
");
+echo "--- Missing html, head ---\n";
+test("foo
");
+test("x foo
");
+echo "--- Missing html, body ---\n";
+test("x foo
");
+echo "--- Missing html ---\n";
+test("x foo
");
+echo "--- Missing head, body ---\n";
+test("foobar");
+test("a foo
");
+echo "--- Missing head ---\n";
+test("hi");
+echo "--- Missing nothing ---\n";
+test("x foo
");
+echo "--- Malformed document ---\n";
+test("foo");
+?>
+--CLEAN--
+
+--EXPECT--
+--- Missing html, head, body ---
+Testing:
+
+bool(true)
+Testing: foobarbaz
+foobarbaz
+bool(true)
+Testing: foo
+foo
+bool(true)
+--- Missing html, head ---
+Testing: foo
+foo
+bool(true)
+Testing: x foo
+x foo
+bool(true)
+--- Missing html, body ---
+Testing: x foo
+x foo
+bool(true)
+--- Missing html ---
+Testing: x foo
+x foo
+bool(true)
+--- Missing head, body ---
+Testing: foobar
+foobar
+bool(true)
+Testing: a foo
+a foo
+bool(true)
+--- Missing head ---
+Testing: hi
+hi
+bool(true)
+--- Missing nothing ---
+Testing: x foo
+x foo
+bool(true)
+--- Malformed document ---
+Testing: foo
+foo
+bool(true)
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED_namespace.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED_namespace.phpt
new file mode 100644
index 0000000000000..745b8e98899b2
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED_namespace.phpt
@@ -0,0 +1,17 @@
+--TEST--
+DOM\HTML5Document::loadHTML() with LIBXML_HTML_NOIMPLIED namespace check
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML("foo
", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
+echo $dom->saveXML();
+var_dump($dom->documentElement->namespaceURI);
+
+?>
+--EXPECT--
+
+foo
+string(28) "http://www.w3.org/1999/xhtml"
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_empty.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_empty.phpt
new file mode 100644
index 0000000000000..885b1b69d062f
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTML_empty.phpt
@@ -0,0 +1,15 @@
+--TEST--
+DOM\HTML5Document::loadHTML() - empty document
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML('');
+echo $dom->saveHTML(), "\n";
+
+?>
+--EXPECT--
+
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_line_column.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_line_column.phpt
new file mode 100644
index 0000000000000..86c8b33ef2d78
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTML_line_column.phpt
@@ -0,0 +1,59 @@
+--TEST--
+DOM\HTML5Document::loadHTML() - line and column test
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML(<<
+
+
+ foo
+
+
+
+
+ This is my paragraph
+
+
+
+
+
+HTML);
+
+$xpath = new DOMXPath($dom);
+
+foreach ($xpath->query("//*") as $element) {
+ echo "Element: '", $element->tagName, "', ", $element->getLineNo(), "\n";
+}
+
+foreach ($xpath->query("//*[name()='strong']") as $element) {
+ echo "Text: '", $element->textContent, "', ", $element->firstChild->getLineNo(), "\n";
+}
+
+foreach ($xpath->query("//*[name()='div']") as $element) {
+ foreach ($element->attributes as $attribute) {
+ echo "Attribute: '", $attribute->nodeName, "', ", $attribute->getLineNo(), "\n";
+ }
+}
+
+foreach ($xpath->query("//comment()") as $comment) {
+ echo "Comment: '", $comment->data, "', ", $comment->getLineNo(), "\n";
+}
+
+?>
+--EXPECT--
+Element: 'html', 1
+Element: 'head', 2
+Element: 'title', 3
+Element: 'body', 5
+Element: 'div', 6
+Element: 'p', 7
+Element: 'strong', 8
+Text: 'This is my paragraph', 8
+Attribute: 'id', 6
+Attribute: 'x', 6
+Comment: ' my comment ', 9
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_normal_no_error.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_normal_no_error.phpt
new file mode 100644
index 0000000000000..cc3fa386170d0
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTML_normal_no_error.phpt
@@ -0,0 +1,41 @@
+--TEST--
+DOM\HTML5Document::loadHTML() - normal document, no error
+--EXTENSIONS--
+dom
+--FILE--
+
+
+
+
+ foo
+
+
+
+ bar
+
+
+HTML;
+$dom->loadHTML($html);
+echo $dom->saveHTML(), "\n";
+
+?>
+--EXPECT--
+
+
+ foo
+
+
+
+ bar
+
+
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_old_dtd.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_old_dtd.phpt
new file mode 100644
index 0000000000000..210ed3bfc32d3
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTML_old_dtd.phpt
@@ -0,0 +1,40 @@
+--TEST--
+HTML5Document::loadHTML(): Old DTD
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML(<<
+
+
+
+
+
+
+HTML);
+
+echo "--- HTML serialization ---\n";
+echo $dom->saveHTML(), "\n";
+echo "--- XML serialization ---\n";
+echo $dom->saveXML();
+
+?>
+--EXPECTF--
+Warning: DOM\HTML5Document::loadHTML(): tree error bad-doctype-token-in-initial-mode in Entity, line: 1, column: 3-9 in %s on line %d
+--- HTML serialization ---
+
+
+
+
+
+--- XML serialization ---
+
+
+
+
+
+
+
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_01.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_01.phpt
new file mode 100644
index 0000000000000..7f7bef7ee3e97
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_01.phpt
@@ -0,0 +1,24 @@
+--TEST--
+DOM\HTML5Document::loadHTML() - parser warning 01
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML($html);
+echo $dom->saveHTML(), "\n";
+
+?>
+--EXPECTF--
+Warning: DOM\HTML5Document::loadHTML(): tokenizer error missing-end-tag-name in Entity, line: 7, column: 11 in %s on line %d
+
+Warning: DOM\HTML5Document::loadHTML(): tree error unexpected-token-in-initial-mode in Entity, line: 1, column: 2-6 in %s on line %d
+foo
+
+
+
+
+
+error
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_02.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_02.phpt
new file mode 100644
index 0000000000000..85639e249f901
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_02.phpt
@@ -0,0 +1,33 @@
+--TEST--
+DOM\HTML5Document::loadHTML() - parser warning 02
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML($html);
+echo $dom->saveHTML(), "\n";
+
+?>
+--EXPECTF--
+Warning: DOM\HTML5Document::loadHTML(): tokenizer error unexpected-null-character in Entity, line: 4, column: 11 in %s on line %d
+
+Warning: DOM\HTML5Document::loadHTML(): tokenizer error missing-whitespace-between-attributes in Entity, line: 5, column: 20 in %s on line %d
+
+Warning: DOM\HTML5Document::loadHTML(): tokenizer error incorrectly-opened-comment in Entity, line: 6, column: 11 in %s on line %d
+
+Warning: DOM\HTML5Document::loadHTML(): tokenizer error nested-comment in Entity, line: 7, column: 18 in %s on line %d
+
+Warning: DOM\HTML5Document::loadHTML(): tree error unexpected-closed-token in Entity, line: 4, column: 18 in %s on line %d
+
+Warning: DOM\HTML5Document::loadHTML(): tree error doctype-token-in-body-mode in Entity, line: 8, column: 11-17 in %s on line %d
+
+ foo
+
+
+ -->
+
+
+
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_03.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_03.phpt
new file mode 100644
index 0000000000000..12f5d810c9af8
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_03.phpt
@@ -0,0 +1,18 @@
+--TEST--
+DOM\HTML5Document::loadHTML() - parser warning 03
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML($html, LIBXML_NOERROR);
+echo $dom->saveHTML(), "\n";
+
+?>
+--EXPECT--
+
+
+
+
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_internal_error.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_internal_error.phpt
new file mode 100644
index 0000000000000..006ee453a3b73
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_internal_error.phpt
@@ -0,0 +1,31 @@
+--TEST--
+DOM\HTML5Document::loadHTML() - parser warning via internal error
+--EXTENSIONS--
+dom
+--FILE--
+x> ';
+$dom->loadHTML($html);
+foreach (libxml_get_errors() as $error) {
+ var_dump($error->message, $error->line, $error->column);
+}
+
+?>
+--EXPECT--
+string(81) "tokenizer error invalid-first-character-of-tag-name in Entity, line: 1, column: 2"
+int(1)
+int(2)
+string(66) "tokenizer error missing-end-tag-name in Entity, line: 1, column: 6"
+int(1)
+int(6)
+string(75) "tree error unexpected-token-in-initial-mode in Entity, line: 1, column: 1-7"
+int(1)
+int(1)
+string(71) "tree error doctype-token-in-body-mode in Entity, line: 1, column: 10-16"
+int(1)
+int(10)
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_without_body.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_without_body.phpt
new file mode 100644
index 0000000000000..1858481d4b982
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_loadHTML_without_body.phpt
@@ -0,0 +1,16 @@
+--TEST--
+DOM\HTML5Document::loadHTML() - document without body
+--EXTENSIONS--
+dom
+--FILE--
+foo '
';
+$dom->loadHTML($html);
+echo $dom->saveHTML(), "\n";
+
+?>
+--EXPECT--
+foo '
diff --git a/ext/dom/tests/HTML5/parser/Document_load_options.phpt b/ext/dom/tests/HTML5/parser/Document_load_options.phpt
new file mode 100644
index 0000000000000..933637cf7a975
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/Document_load_options.phpt
@@ -0,0 +1,109 @@
+--TEST--
+HTML5Document: loading $options check
+--EXTENSIONS--
+dom
+--FILE--
+{$method}("x", $options);
+ } catch (ValueError $e) {
+ echo $e->getMessage(), "\n";
+ }
+ }
+}
+
+?>
+--EXPECTF--
+--- Method loadHTML ---
+int(%d)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4194304)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(524288)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(8)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(16)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(256)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(16384)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(2)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(1024)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(1)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(2048)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(64)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(128)
+DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+--- Method loadHTMLFile ---
+int(%d)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4194304)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(524288)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(8)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(16)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(256)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(16384)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(2)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(1024)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(1)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(2048)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(64)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(128)
+DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
diff --git a/ext/dom/tests/HTML5/parser/paragraph.html b/ext/dom/tests/HTML5/parser/paragraph.html
new file mode 100644
index 0000000000000..998ea4094d496
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/paragraph.html
@@ -0,0 +1 @@
+foo
\ No newline at end of file
diff --git a/ext/dom/tests/HTML5/parser/parser_warning_01.html b/ext/dom/tests/HTML5/parser/parser_warning_01.html
new file mode 100644
index 0000000000000..cc16fe40eb145
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/parser_warning_01.html
@@ -0,0 +1,7 @@
+foo
+
+
+
+
+
+error>
\ No newline at end of file
diff --git a/ext/dom/tests/HTML5/parser/parser_warning_02.html b/ext/dom/tests/HTML5/parser/parser_warning_02.html
new file mode 100644
index 0000000000000000000000000000000000000000..cb448e3f35d7fb3387e503f5ea0c3ba41154c7bd
GIT binary patch
literal 191
zcmcCfbn$l%i41U6@CfnsvE#DID9O#S<5B
+
+
+ >
+
+
\ No newline at end of file
diff --git a/ext/dom/tests/HTML5/parser/predefined_namespaces.phpt b/ext/dom/tests/HTML5/parser/predefined_namespaces.phpt
new file mode 100644
index 0000000000000..927bf6a330939
--- /dev/null
+++ b/ext/dom/tests/HTML5/parser/predefined_namespaces.phpt
@@ -0,0 +1,101 @@
+--TEST--
+HTML5Document: Predefined namespaces
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML(<<
+
+
+ Test
+
+
+
+
+
+
+
+
+
+
+
+
+
+HTML);
+
+echo "--- Namespaces ---\n";
+$xpath = new DOMXPath($dom);
+foreach ($xpath->query("//*[name()='body']//*") as $node) {
+ echo $node->nodeName, " ", $node->namespaceURI ?? "(NONE)", "\n";
+ foreach ($node->attributes as $attribute) {
+ echo " Attribute: ", $attribute->nodeName, " ", $attribute->namespaceURI ?? "(NONE)", "\n";
+ }
+}
+
+echo "--- HTML serialization ---\n";
+echo $dom->saveHTML(), "\n";
+echo "--- XML serialization ---\n";
+echo $dom->saveXML();
+
+?>
+--EXPECT--
+--- Namespaces ---
+svg http://www.w3.org/2000/svg
+ Attribute: width (NONE)
+ Attribute: height (NONE)
+ Attribute: viewbox (NONE)
+rect http://www.w3.org/2000/svg
+ Attribute: id (NONE)
+ Attribute: x (NONE)
+ Attribute: y (NONE)
+ Attribute: width (NONE)
+ Attribute: height (NONE)
+div http://www.w3.org/1999/xhtml
+p http://www.w3.org/1999/xhtml
+math http://www.w3.org/1998/Math/MathML
+mtable http://www.w3.org/1998/Math/MathML
+ Attribute: id (NONE)
+svg http://www.w3.org/1998/Math/MathML
+--- HTML serialization ---
+
+ Test
+
+
+
+
+
+
+
+
+
+
+
+
+
+--- XML serialization ---
+
+
+
+ Test
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/dom/tests/HTML5/serializer/Document_escape_attribute.phpt b/ext/dom/tests/HTML5/serializer/Document_escape_attribute.phpt
new file mode 100644
index 0000000000000..17ba3e912411a
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_escape_attribute.phpt
@@ -0,0 +1,16 @@
+--TEST--
+DOM\HTML5Document serialization escape attribute
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML("
", LIBXML_NOERROR);
+$p = $dom->documentElement->firstChild->nextSibling->firstChild;
+$p->setAttribute("foo", "\"'&");
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+
diff --git a/ext/dom/tests/HTML5/serializer/Document_escape_nbsp.phpt b/ext/dom/tests/HTML5/serializer/Document_escape_nbsp.phpt
new file mode 100644
index 0000000000000..5d9988fdcb81e
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_escape_nbsp.phpt
@@ -0,0 +1,14 @@
+--TEST--
+DOM\HTML5Document serialization escape nbsp
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML("these must transform: \xc2\xa0\xc2\xa0 but these not: \xa0|\xc2...
", LIBXML_NOERROR);
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+these must transform: but these not: �|�...
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_attribute_ns.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_attribute_ns.phpt
new file mode 100644
index 0000000000000..14616aedf01f7
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_attribute_ns.phpt
@@ -0,0 +1,22 @@
+--TEST--
+DOM\HTML5Document serialization of an attribute in a namespace
+--EXTENSIONS--
+dom
+--FILE--
+appendChild($dom->createElement("root"));
+$root->setAttributeNodeNS($dom->createAttributeNS("http://php.net", "x:foo"));
+$root->setAttributeNodeNS($dom->createAttributeNS("http://www.w3.org/XML/1998/namespace", "y:id"));
+// Can't test the following because its behaviour is broken in combination with the live spec
+//$root->setAttributeNodeNS($dom->createAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns"));
+$root->setAttributeNodeNS($dom->createAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:f"));
+$root->setAttributeNodeNS($dom->createAttributeNS("http://www.w3.org/1999/xlink", "z:f"));
+
+// Note: XML declarations are not emitted in HTML5
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_cdata.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_cdata.phpt
new file mode 100644
index 0000000000000..df40f919d9b9a
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_cdata.phpt
@@ -0,0 +1,14 @@
+--TEST--
+DOM\HTML5Document serialization of CData
+--EXTENSIONS--
+dom
+--FILE--
+appendChild($dom->createCDATASection("foobaré\"<>-&"));
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+foobaré"<>-&
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_comment.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_comment.phpt
new file mode 100644
index 0000000000000..ee78dfaeec3c1
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_comment.phpt
@@ -0,0 +1,14 @@
+--TEST--
+DOM\HTML5Document serialization of comment
+--EXTENSIONS--
+dom
+--FILE--
+appendChild($dom->createComment("foobaré\"<>-&"));
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_doctype.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_doctype.phpt
new file mode 100644
index 0000000000000..6e991bbd79757
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_doctype.phpt
@@ -0,0 +1,40 @@
+--TEST--
+DOM\HTML5Document serialization of document type
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML(<<
+
+
+
+
+
+
+HTML, LIBXML_NOERROR);
+
+echo "--- XML encoding ---\n";
+echo $dom->saveXML();
+echo "--- HTML encoding ---\n";
+// We don't expec to see the public ID and the system ID because the serialization algorithm doesn't serialize those
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+--- XML encoding ---
+
+
+
+
+
+
+
+--- HTML encoding ---
+
+
+
+
+
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_element_ns.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_element_ns.phpt
new file mode 100644
index 0000000000000..d5fdf386a91a0
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_element_ns.phpt
@@ -0,0 +1,29 @@
+--TEST--
+DOM\HTML5Document serialization of element in a namespace
+--EXTENSIONS--
+dom
+--FILE--
+appendChild($dom->createElement("root"));
+
+$root->append("\n");
+$root->append($dom->createElementNS("http://php.net", "noprefix"), "\n");
+$root->append($dom->createElementNS("http://php.net", "with:prefix"), "\n");
+$root->append($dom->createElementNS("http://www.w3.org/1999/xhtml", "xhtml:br"), "\n");
+$root->append($dom->createElementNS("http://www.w3.org/2000/svg", "s:svg"), "\n");
+$root->append($dom->createElementNS("http://www.w3.org/1998/Math/MathML", "m:math"), "\n");
+
+// Note: XML declarations are not emitted in HTML5
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+
+
+
+
+
+
+
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_failing_stream.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_failing_stream.phpt
new file mode 100644
index 0000000000000..807bd8c0e3e5e
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_failing_stream.phpt
@@ -0,0 +1,49 @@
+--TEST--
+DOM\HTML5Document serialization with a failing stream
+--EXTENSIONS--
+dom
+--FILE--
+fail) {
+ throw new Error("fail");
+ }
+ $this->fail = true;
+ var_dump($data);
+ return strlen($data);
+ }
+
+ public function stream_eof() {
+ return false;
+ }
+
+ public function stream_close() {
+ return true;
+ }
+}
+
+stream_wrapper_register("failing", "FailingWrapper");
+
+$dom = new DOM\HTML5Document();
+$root = $dom->appendChild($dom->createElement("root"));
+$dom->saveHTMLFile("failing://foo");
+
+?>
+--EXPECTF--
+string(1) "<"
+
+Fatal error: Uncaught Error: fail in %s:%d
+Stack trace:
+#0 [internal function]: FailingWrapper->stream_write('root')
+#1 %s(%d): DOM\HTML5Document->saveHTMLFile('failing://foo')
+#2 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_fragment.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_fragment.phpt
new file mode 100644
index 0000000000000..81360ab65dc63
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_fragment.phpt
@@ -0,0 +1,18 @@
+--TEST--
+DOM\HTML5Document serialization of document fragment
+--EXTENSIONS--
+dom
+--FILE--
+createDocumentFragment();
+$fragment->appendChild($dom->createElement("foo"));
+$bar = $fragment->appendChild($dom->createElement("bar"));
+$fragment->appendChild($dom->createElement("baz"));
+$bar->appendChild($dom->createElement("inner"));
+echo $dom->saveHTML($fragment);
+
+?>
+--EXPECT--
+
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_full_document.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_full_document.phpt
new file mode 100644
index 0000000000000..b9f5c973de614
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_full_document.phpt
@@ -0,0 +1,52 @@
+--TEST--
+DOM\HTML5Document serialization of full document
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML(<<
+
+
+ This is my épic title!
+
+
+
+
+
+
+
+
+
+
+
+
+HTML);
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+
+ This is my épic title!
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_01.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_01.phpt
new file mode 100644
index 0000000000000..e55cf7e0bed36
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_01.phpt
@@ -0,0 +1,34 @@
+--TEST--
+Document serialization with an imported namespace node 01
+--EXTENSIONS--
+dom
+--FILE--
+loadXML(' ');
+$xml->documentElement->setAttributeNS("http://foo/", "foo:bar", "value");
+$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
+echo $xml->saveXML();
+echo $xml->saveHTML(), "\n";
+
+echo "--- After import into HTML ---\n";
+
+$html = new DOM\HTML5Document();
+$html->loadHTML('foo
', LIBXML_NOERROR);
+
+$p = $html->documentElement->firstChild->nextSibling->firstChild;
+$p->appendChild($html->importNode($xml->documentElement, true));
+
+echo $html->saveXML();
+echo $html->saveHTML(), "\n";
+
+?>
+--EXPECT--
+
+
+
+--- After import into HTML ---
+
+ foo
+foo
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_02.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_02.phpt
new file mode 100644
index 0000000000000..98844f4493d1c
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_02.phpt
@@ -0,0 +1,33 @@
+--TEST--
+Document serialization with an imported namespace node 02
+--EXTENSIONS--
+dom
+--FILE--
+loadXML(' ');
+$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
+echo $xml->saveXML();
+echo $xml->saveHTML(), "\n";
+
+echo "--- After import into HTML ---\n";
+
+$html = new DOM\HTML5Document();
+$html->loadHTML('foo
', LIBXML_NOERROR);
+
+$p = $html->documentElement->firstChild->nextSibling->firstChild;
+$p->appendChild($html->importNode($xml->documentElement, true));
+
+echo $html->saveXML();
+echo $html->saveHTML(), "\n";
+
+?>
+--EXPECT--
+
+
+
+--- After import into HTML ---
+
+
foo
+foo
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_03.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_03.phpt
new file mode 100644
index 0000000000000..d0340fbe806e1
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_03.phpt
@@ -0,0 +1,33 @@
+--TEST--
+Document serialization with an imported namespace node 03
+--EXTENSIONS--
+dom
+--FILE--
+loadXML(' ');
+$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
+echo $xml->saveXML();
+echo $xml->saveHTML(), "\n";
+
+echo "--- After import into HTML ---\n";
+
+$html = new DOM\HTML5Document();
+$html->loadHTML('foo
', LIBXML_NOERROR);
+
+$p = $html->documentElement->firstChild->nextSibling->firstChild;
+$p->appendChild($html->importNode($xml->documentElement, false));
+
+echo $html->saveXML();
+echo $html->saveHTML(), "\n";
+
+?>
+--EXPECT--
+
+
+
+--- After import into HTML ---
+
+foo
+foo
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_04.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_04.phpt
new file mode 100644
index 0000000000000..4905e5daa275d
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_04.phpt
@@ -0,0 +1,33 @@
+--TEST--
+Document serialization with an imported namespace node 04
+--EXTENSIONS--
+dom
+--FILE--
+loadXML(' ');
+$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
+echo $xml->saveXML();
+echo $xml->saveHTML(), "\n";
+
+echo "--- After import into HTML ---\n";
+
+$html = new DOM\HTML5Document();
+$html->loadHTML('foo
', LIBXML_NOERROR);
+
+$p = $html->documentElement->firstChild->nextSibling->firstChild;
+$p->appendChild($html->importNode($xml->documentElement, false));
+
+echo $html->saveXML();
+echo $html->saveHTML(), "\n";
+
+?>
+--EXPECT--
+
+
+
+--- After import into HTML ---
+
+foo
+foo
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_05.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_05.phpt
new file mode 100644
index 0000000000000..7c1f72f867b31
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_05.phpt
@@ -0,0 +1,33 @@
+--TEST--
+Document serialization with an imported namespace node 05
+--EXTENSIONS--
+dom
+--FILE--
+loadXML(' ');
+$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
+echo $xml->saveXML();
+echo $xml->saveHTML(), "\n";
+
+echo "--- After adoption into HTML ---\n";
+
+$html = new DOM\HTML5Document();
+$html->loadHTML('foo
', LIBXML_NOERROR);
+
+$p = $html->documentElement->firstChild->nextSibling->firstChild;
+$p->appendChild($html->adoptNode($xml->documentElement));
+
+echo $html->saveXML();
+echo $html->saveHTML(), "\n";
+
+?>
+--EXPECT--
+
+
+
+--- After adoption into HTML ---
+
+foo
+foo
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_06.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_06.phpt
new file mode 100644
index 0000000000000..f67ecbb082b75
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_06.phpt
@@ -0,0 +1,33 @@
+--TEST--
+Document serialization with an imported namespace node 06
+--EXTENSIONS--
+dom
+--FILE--
+loadXML(' ');
+$xml->documentElement->firstChild->appendChild($xml->createElementNS('some:ns2', 'child'));
+echo $xml->saveXML();
+echo $xml->saveHTML(), "\n";
+
+echo "--- After clone + import into HTML ---\n";
+
+$html = new DOM\HTML5Document();
+$html->loadHTML('foo
', LIBXML_NOERROR);
+
+$p = $html->documentElement->firstChild->nextSibling->firstChild;
+$p->appendChild($html->adoptNode($xml->documentElement->firstChild->cloneNode(true)));
+
+echo $html->saveXML();
+echo $html->saveHTML(), "\n";
+
+?>
+--EXPECT--
+
+
+
+--- After clone + import into HTML ---
+
+foo
+foo
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_processing_instruction.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_processing_instruction.phpt
new file mode 100644
index 0000000000000..3d34acd403e7a
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_processing_instruction.phpt
@@ -0,0 +1,15 @@
+--TEST--
+DOM\HTML5Document serialization of processing instruction
+--EXTENSIONS--
+dom
+--FILE--
+ in a processing instruction element but that breaks (as expected)
+$dom->appendChild($dom->createProcessingInstruction("target", "foobaré\"&<\xc2\xa0"));
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_roots_test_empty.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_roots_test_empty.phpt
new file mode 100644
index 0000000000000..d1143a264fe7a
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_roots_test_empty.phpt
@@ -0,0 +1,30 @@
+--TEST--
+DOM\HTML5Document serialization of different roots resulting in an empty result
+--EXTENSIONS--
+dom
+--FILE--
+appendChild($dom->createComment("comment"));
+$cdata = $dom->appendChild($dom->createCDATASection("cdata"));
+$emptyElement = $dom->appendChild($dom->createElement("empty"));
+$text = $dom->appendChild($dom->createTextNode("text"));
+$pi = $dom->appendChild($dom->createProcessingInstruction("target", "data"));
+$fragment = $dom->createDocumentFragment();
+
+var_dump($dom->saveHTML($comment));
+var_dump($dom->saveHTML($cdata));
+var_dump($dom->saveHTML($emptyElement));
+var_dump($dom->saveHTML($text));
+var_dump($dom->saveHTML($pi));
+var_dump($dom->saveHTML($fragment));
+
+?>
+--EXPECT--
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_text_01.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_text_01.phpt
new file mode 100644
index 0000000000000..6f1b0c1c98f7d
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_text_01.phpt
@@ -0,0 +1,16 @@
+--TEST--
+DOM\HTML5Document serialization escape text 01
+--EXTENSIONS--
+dom
+--FILE--
+loadHTML("
", LIBXML_NOERROR);
+$p = $dom->documentElement->firstChild->nextSibling->firstChild;
+$p->textContent = "this is &text! \"\"";
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+this is <some> &text! ""
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_text_02.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_text_02.phpt
new file mode 100644
index 0000000000000..5c9a31a12bf63
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_text_02.phpt
@@ -0,0 +1,27 @@
+--TEST--
+DOM\HTML5Document serialization escape text 02
+--EXTENSIONS--
+dom
+--FILE--
+appendChild($dom->createElement("body"));
+foreach (["style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"] as $tag) {
+ $tag = $body->appendChild($dom->createElement($tag));
+ $tag->textContent = "&\"<>\xc2\xa0 foobar";
+ $body->appendChild(new DOMText("\n"));
+}
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+
+
+&"<> foobar
+
+&"<> foobar
+&"<> foobar
+&"<> foobar
+&"<> foobar
+
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_text_03.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_text_03.phpt
new file mode 100644
index 0000000000000..5da3187652e05
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_text_03.phpt
@@ -0,0 +1,27 @@
+--TEST--
+DOM\HTML5Document serialization escape text 03
+--EXTENSIONS--
+dom
+--FILE--
+appendChild($dom->createElement("body"));
+foreach (["style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"] as $tag) {
+ $tag = $body->appendChild($dom->createElementNS("some:ns", $tag));
+ $tag->textContent = "&\"<>\xc2\xa0 foobar";
+ $body->appendChild(new DOMText("\n"));
+}
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+
+
+&"<> foobar
+
+&"<> foobar
+&"<> foobar
+&"<> foobar
+&"<> foobar
+
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_void_elements.phpt b/ext/dom/tests/HTML5/serializer/Document_serialize_void_elements.phpt
new file mode 100644
index 0000000000000..b75940da5c2f3
--- /dev/null
+++ b/ext/dom/tests/HTML5/serializer/Document_serialize_void_elements.phpt
@@ -0,0 +1,100 @@
+--TEST--
+DOM\HTML5Document serialization of void elements
+--EXTENSIONS--
+dom
+--FILE--
+appendChild($dom->createElement($tag));
+ $element->appendChild($dom->createElement("inner"));
+ $element->after("\n");
+ echo "$tag: ";
+ var_dump(strlen($dom->saveHTML($element)) === 0);
+
+ $element = $dom->appendChild($dom->createElementNS("http://php.net/foo", "x:$tag"));
+ $element->appendChild($dom->createElement("inner"));
+ $element->after("\n");
+}
+
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+area: bool(true)
+base: bool(true)
+br: bool(true)
+col: bool(true)
+embed: bool(true)
+hr: bool(true)
+img: bool(true)
+input: bool(true)
+link: bool(true)
+meta: bool(true)
+source: bool(true)
+track: bool(true)
+wbr: bool(true)
+basefont: bool(true)
+bgsound: bool(true)
+frame: bool(true)
+keygen: bool(true)
+param: bool(true)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c
index 0ef0fdfa8c094..8d0f4a0b7b4b5 100644
--- a/ext/dom/xpath.c
+++ b/ext/dom/xpath.c
@@ -383,7 +383,6 @@ static void php_xpath_eval(INTERNAL_FUNCTION_PARAMETERS, int type) /* {{{ */
}
}
-
ctxp->namespaces = ns;
ctxp->nsNr = nsnbr;
From c166391273a52db479cac73fcbedfcfd9cf6f99f Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 17 Sep 2023 14:24:23 +0200
Subject: [PATCH 05/53] Create aliases for DOM constants
---
ext/dom/php_dom.stub.php | 86 +++++++++++++++++++++++++++++++++++++++
ext/dom/php_dom_arginfo.h | 19 ++++++++-
2 files changed, 104 insertions(+), 1 deletion(-)
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index 291d1a747291a..2f4600bbbde3e 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -1016,6 +1016,92 @@ function dom_import_simplexml(object $node): DOMElement {}
namespace DOM
{
+ /**
+ * @var int
+ * @cvalue PHP_ERR
+ */
+ const PHP_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue INDEX_SIZE_ERR
+ */
+ const INDEX_SIZE_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue DOMSTRING_SIZE_ERR
+ */
+ const STRING_SIZE_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue HIERARCHY_REQUEST_ERR
+ */
+ const HIERARCHY_REQUEST_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue WRONG_DOCUMENT_ERR
+ */
+ const WRONG_DOCUMENT_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue INVALID_CHARACTER_ERR
+ */
+ const INVALID_CHARACTER_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue NO_DATA_ALLOWED_ERR
+ */
+ const NO_DATA_ALLOWED_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue NO_MODIFICATION_ALLOWED_ERR
+ */
+ const NO_MODIFICATION_ALLOWED_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue NOT_FOUND_ERR
+ */
+ const NOT_FOUND_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue NOT_SUPPORTED_ERR
+ */
+ const NOT_SUPPORTED_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue INUSE_ATTRIBUTE_ERR
+ */
+ const INUSE_ATTRIBUTE_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue INVALID_STATE_ERR
+ */
+ const INVALID_STATE_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue SYNTAX_ERR
+ */
+ const SYNTAX_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue INVALID_MODIFICATION_ERR
+ */
+ const INVALID_MODIFICATION_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue NAMESPACE_ERR
+ */
+ const NAMESPACE_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue INVALID_ACCESS_ERR
+ */
+ const INVALID_ACCESS_ERR = UNKNOWN;
+ /**
+ * @var int
+ * @cvalue VALIDATION_ERR
+ */
+ const VALIDATION_ERR = UNKNOWN;
+
/**
* @var int
* @cvalue DOM_HTML_NO_DEFAULT_NS
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index cffaa09deb8a1..04c71d6bd35b8 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 4d2c0e49b4a7a9d6f99669a2d8f8bf8b646b80c9 */
+ * Stub hash: 73e3e87a39619d70aa94ac3d0f8a1dcf35613b70 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -1057,6 +1057,23 @@ static void register_php_dom_symbols(int module_number)
REGISTER_LONG_CONSTANT("DOM_NAMESPACE_ERR", NAMESPACE_ERR, CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("DOM_INVALID_ACCESS_ERR", INVALID_ACCESS_ERR, CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("DOM_VALIDATION_ERR", VALIDATION_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\PHP_ERR", PHP_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\INDEX_SIZE_ERR", INDEX_SIZE_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\STRING_SIZE_ERR", DOMSTRING_SIZE_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\HIERARCHY_REQUEST_ERR", HIERARCHY_REQUEST_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\WRONG_DOCUMENT_ERR", WRONG_DOCUMENT_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\INVALID_CHARACTER_ERR", INVALID_CHARACTER_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\NO_DATA_ALLOWED_ERR", NO_DATA_ALLOWED_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\NO_MODIFICATION_ALLOWED_ERR", NO_MODIFICATION_ALLOWED_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\NOT_FOUND_ERR", NOT_FOUND_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\NOT_SUPPORTED_ERR", NOT_SUPPORTED_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\INUSE_ATTRIBUTE_ERR", INUSE_ATTRIBUTE_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\INVALID_STATE_ERR", INVALID_STATE_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\SYNTAX_ERR", SYNTAX_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\INVALID_MODIFICATION_ERR", INVALID_MODIFICATION_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\NAMESPACE_ERR", NAMESPACE_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\INVALID_ACCESS_ERR", INVALID_ACCESS_ERR, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("DOM\\VALIDATION_ERR", VALIDATION_ERR, CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("DOM\\HTML_NO_DEFAULT_NS", DOM_HTML_NO_DEFAULT_NS, CONST_PERSISTENT);
}
From a61ff75e7aeeab0c5731481df1d341206ea4e24d Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 17 Sep 2023 14:32:43 +0200
Subject: [PATCH 06/53] Alias dom_import_simplexml
---
ext/dom/php_dom.stub.php | 3 +++
ext/dom/php_dom_arginfo.h | 7 ++++++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index 2f4600bbbde3e..94eb7e62295f1 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -1124,4 +1124,7 @@ public function saveHTML(?\DOMNode $node = null): string|false {}
public function saveHTMLFile(string $filename): int|false {}
}
+
+ /** @implementation-alias dom_import_simplexml */
+ function import_simplexml(object $node): DOMElement {}
}
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index 04c71d6bd35b8..e182ca3781511 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,10 +1,14 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 73e3e87a39619d70aa94ac3d0f8a1dcf35613b70 */
+ * Stub hash: ffe5e718fe6449a3c93c62feb12bc9e3c57e340d */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
ZEND_END_ARG_INFO()
+ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_DOM_import_simplexml, 0, 1, DOM\\DOMElement, 0)
+ ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
+ZEND_END_ARG_INFO()
+
ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOMCdataSection___construct, 0, 0, 1)
ZEND_ARG_TYPE_INFO(0, data, IS_STRING, 0)
ZEND_END_ARG_INFO()
@@ -726,6 +730,7 @@ ZEND_METHOD(DOM_HTML5Document, saveHTMLFile);
static const zend_function_entry ext_functions[] = {
ZEND_FE(dom_import_simplexml, arginfo_dom_import_simplexml)
+ ZEND_NS_FALIAS("DOM", import_simplexml, dom_import_simplexml, arginfo_DOM_import_simplexml)
ZEND_FE_END
};
From 13012dd70efa0aaa92b1dae8789bdfee1c45dcb9 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 17 Sep 2023 14:52:50 +0200
Subject: [PATCH 07/53] Create class aliases
---
ext/dom/php_dom.stub.php | 25 ++++++++++++++++++++++++-
ext/dom/php_dom_arginfo.h | 33 +++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+), 1 deletion(-)
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index 94eb7e62295f1..54c702f068bd0 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -239,6 +239,7 @@
*/
const DOM_VALIDATION_ERR = UNKNOWN;
+ /** @alias DOM\DocumentType */
class DOMDocumentType extends DOMNode
{
/** @readonly */
@@ -260,16 +261,19 @@ class DOMDocumentType extends DOMNode
public ?string $internalSubset;
}
+ /** @alias DOM\CDATASection */
class DOMCdataSection extends DOMText
{
public function __construct(string $data) {}
}
+ /** @alias DOM\Comment */
class DOMComment extends DOMCharacterData
{
public function __construct(string $data = "") {}
}
+ /** @alias DOM\ParentNode */
interface DOMParentNode
{
/** @param DOMNode|string $nodes */
@@ -282,6 +286,7 @@ public function prepend(...$nodes): void;
public function replaceChildren(...$nodes): void;
}
+ /** @alias DOM\ChildNode */
interface DOMChildNode
{
public function remove(): void;
@@ -296,6 +301,7 @@ public function after(...$nodes): void;
public function replaceWith(...$nodes): void;
}
+ /** @alias DOM\Node */
class DOMNode
{
public const int DOCUMENT_POSITION_DISCONNECTED = 0x01;
@@ -420,6 +426,7 @@ public function __sleep(): array {}
public function __wakeup(): void {}
}
+ /** @alias DOM\NameSpaceNode */
class DOMNameSpaceNode
{
/** @readonly */
@@ -459,6 +466,7 @@ public function __sleep(): array {}
public function __wakeup(): void {}
}
+ /** @alias DOM\Implementation */
class DOMImplementation
{
/** @tentative-return-type */
@@ -474,6 +482,7 @@ public function createDocumentType(string $qualifiedName, string $publicId = "",
public function createDocument(?string $namespace = null, string $qualifiedName = "", ?DOMDocumentType $doctype = null) {}
}
+ /** @alias DOM\DocumentFragment */
class DOMDocumentFragment extends DOMNode implements DOMParentNode
{
/** @readonly */
@@ -509,6 +518,7 @@ public function prepend(...$nodes): void {}
public function replaceChildren(...$nodes): void {}
}
+ /** @alias DOM\NodeList */
class DOMNodeList implements IteratorAggregate, Countable
{
/** @readonly */
@@ -523,6 +533,7 @@ public function getIterator(): Iterator {}
public function item(int $index) {}
}
+ /** @alias DOM\CharacterData */
class DOMCharacterData extends DOMNode implements DOMChildNode
{
public string $data;
@@ -573,6 +584,7 @@ public function before(... $nodes): void {}
public function after(...$nodes): void {}
}
+ /** @alias DOM\Attr */
class DOMAttr extends DOMNode
{
/** @readonly */
@@ -595,6 +607,7 @@ public function __construct(string $name, string $value = "") {}
public function isId(): bool {}
}
+ /** @alias DOM\Element */
class DOMElement extends DOMNode implements DOMParentNode, DOMChildNode
{
/** @readonly */
@@ -883,6 +896,7 @@ public function prepend(...$nodes): void {}
public function replaceChildren(...$nodes): void {}
}
+ /** @alias DOM\Exception */
final class DOMException extends Exception
{
/**
@@ -892,6 +906,7 @@ final class DOMException extends Exception
public $code = 0; // TODO add proper type (i.e. int|string)
}
+ /** @alias DOM\Text */
class DOMText extends DOMCharacterData
{
/** @readonly */
@@ -912,6 +927,7 @@ public function isElementContentWhitespace(): bool {}
public function splitText(int $offset) {}
}
+ /** @alias DOM\NamedNodeMap */
class DOMNamedNodeMap implements IteratorAggregate, Countable
{
/** @readonly */
@@ -932,6 +948,7 @@ public function count(): int {}
public function getIterator(): Iterator {}
}
+ /** @alias DOM\Entity */
class DOMEntity extends DOMNode
{
/** @readonly */
@@ -962,11 +979,13 @@ class DOMEntity extends DOMNode
public ?string $version = null;
}
+ /** @alias DOM\EntityReference */
class DOMEntityReference extends DOMNode
{
public function __construct(string $name) {}
}
+ /** @alias DOM\Notation */
class DOMNotation extends DOMNode
{
/** @readonly */
@@ -976,6 +995,7 @@ class DOMNotation extends DOMNode
public string $systemId;
}
+ /** @alias DOM\ProcessingInstruction */
class DOMProcessingInstruction extends DOMNode
{
/** @readonly */
@@ -987,7 +1007,10 @@ public function __construct(string $name, string $value = "") {}
}
#ifdef LIBXML_XPATH_ENABLED
- /** @not-serializable */
+ /**
+ * @not-serializable
+ * @alias DOM\XPath
+ */
class DOMXPath
{
/** @readonly */
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index e182ca3781511..31667facad9c7 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,5 +1,9 @@
/* This is a generated file, edit the .stub.php file instead.
+<<<<<<< HEAD
* Stub hash: ffe5e718fe6449a3c93c62feb12bc9e3c57e340d */
+=======
+ * Stub hash: a099cfd8989a44593d352c5d4ace393f12e10c95 */
+>>>>>>> 6cdbadbf86 (Create class aliases)
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -1088,6 +1092,7 @@ static zend_class_entry *register_class_DOMDocumentType(zend_class_entry *class_
INIT_CLASS_ENTRY(ce, "DOMDocumentType", class_DOMDocumentType_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMNode);
+ zend_register_class_alias("DOM\\DocumentType", class_entry);
zval property_name_default_value;
ZVAL_UNDEF(&property_name_default_value);
@@ -1136,6 +1141,7 @@ static zend_class_entry *register_class_DOMCdataSection(zend_class_entry *class_
INIT_CLASS_ENTRY(ce, "DOMCdataSection", class_DOMCdataSection_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMText);
+ zend_register_class_alias("DOM\\CDATASection", class_entry);
return class_entry;
}
@@ -1146,6 +1152,7 @@ static zend_class_entry *register_class_DOMComment(zend_class_entry *class_entry
INIT_CLASS_ENTRY(ce, "DOMComment", class_DOMComment_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMCharacterData);
+ zend_register_class_alias("DOM\\Comment", class_entry);
return class_entry;
}
@@ -1156,6 +1163,7 @@ static zend_class_entry *register_class_DOMParentNode(void)
INIT_CLASS_ENTRY(ce, "DOMParentNode", class_DOMParentNode_methods);
class_entry = zend_register_internal_interface(&ce);
+ zend_register_class_alias("DOM\\ParentNode", class_entry);
return class_entry;
}
@@ -1166,6 +1174,7 @@ static zend_class_entry *register_class_DOMChildNode(void)
INIT_CLASS_ENTRY(ce, "DOMChildNode", class_DOMChildNode_methods);
class_entry = zend_register_internal_interface(&ce);
+ zend_register_class_alias("DOM\\ChildNode", class_entry);
return class_entry;
}
@@ -1176,6 +1185,11 @@ static zend_class_entry *register_class_DOMNode(void)
INIT_CLASS_ENTRY(ce, "DOMNode", class_DOMNode_methods);
class_entry = zend_register_internal_class_ex(&ce, NULL);
+<<<<<<< HEAD
+=======
+ class_entry->ce_flags |= ZEND_ACC_NOT_SERIALIZABLE;
+ zend_register_class_alias("DOM\\Node", class_entry);
+>>>>>>> 6cdbadbf86 (Create class aliases)
zval const_DOCUMENT_POSITION_DISCONNECTED_value;
ZVAL_LONG(&const_DOCUMENT_POSITION_DISCONNECTED_value, 0x1);
@@ -1339,6 +1353,11 @@ static zend_class_entry *register_class_DOMNameSpaceNode(void)
INIT_CLASS_ENTRY(ce, "DOMNameSpaceNode", class_DOMNameSpaceNode_methods);
class_entry = zend_register_internal_class_ex(&ce, NULL);
+<<<<<<< HEAD
+=======
+ class_entry->ce_flags |= ZEND_ACC_NOT_SERIALIZABLE;
+ zend_register_class_alias("DOM\\NameSpaceNode", class_entry);
+>>>>>>> 6cdbadbf86 (Create class aliases)
zval property_nodeName_default_value;
ZVAL_UNDEF(&property_nodeName_default_value);
@@ -1412,6 +1431,7 @@ static zend_class_entry *register_class_DOMImplementation(void)
INIT_CLASS_ENTRY(ce, "DOMImplementation", class_DOMImplementation_methods);
class_entry = zend_register_internal_class_ex(&ce, NULL);
+ zend_register_class_alias("DOM\\Implementation", class_entry);
return class_entry;
}
@@ -1423,6 +1443,7 @@ static zend_class_entry *register_class_DOMDocumentFragment(zend_class_entry *cl
INIT_CLASS_ENTRY(ce, "DOMDocumentFragment", class_DOMDocumentFragment_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMNode);
zend_class_implements(class_entry, 1, class_entry_DOMParentNode);
+ zend_register_class_alias("DOM\\DocumentFragment", class_entry);
zval property_firstElementChild_default_value;
ZVAL_UNDEF(&property_firstElementChild_default_value);
@@ -1454,6 +1475,7 @@ static zend_class_entry *register_class_DOMNodeList(zend_class_entry *class_entr
INIT_CLASS_ENTRY(ce, "DOMNodeList", class_DOMNodeList_methods);
class_entry = zend_register_internal_class_ex(&ce, NULL);
zend_class_implements(class_entry, 2, class_entry_IteratorAggregate, class_entry_Countable);
+ zend_register_class_alias("DOM\\NodeList", class_entry);
zval property_length_default_value;
ZVAL_UNDEF(&property_length_default_value);
@@ -1471,6 +1493,7 @@ static zend_class_entry *register_class_DOMCharacterData(zend_class_entry *class
INIT_CLASS_ENTRY(ce, "DOMCharacterData", class_DOMCharacterData_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMNode);
zend_class_implements(class_entry, 1, class_entry_DOMChildNode);
+ zend_register_class_alias("DOM\\CharacterData", class_entry);
zval property_data_default_value;
ZVAL_UNDEF(&property_data_default_value);
@@ -1507,6 +1530,7 @@ static zend_class_entry *register_class_DOMAttr(zend_class_entry *class_entry_DO
INIT_CLASS_ENTRY(ce, "DOMAttr", class_DOMAttr_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMNode);
+ zend_register_class_alias("DOM\\Attr", class_entry);
zval property_name_default_value;
ZVAL_UNDEF(&property_name_default_value);
@@ -1549,6 +1573,7 @@ static zend_class_entry *register_class_DOMElement(zend_class_entry *class_entry
INIT_CLASS_ENTRY(ce, "DOMElement", class_DOMElement_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMNode);
zend_class_implements(class_entry, 2, class_entry_DOMParentNode, class_entry_DOMChildNode);
+ zend_register_class_alias("DOM\\Element", class_entry);
zval property_tagName_default_value;
ZVAL_UNDEF(&property_tagName_default_value);
@@ -1766,6 +1791,7 @@ static zend_class_entry *register_class_DOMException(zend_class_entry *class_ent
INIT_CLASS_ENTRY(ce, "DOMException", class_DOMException_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_Exception);
class_entry->ce_flags |= ZEND_ACC_FINAL;
+ zend_register_class_alias("DOM\\Exception", class_entry);
zval property_code_default_value;
ZVAL_LONG(&property_code_default_value, 0);
@@ -1782,6 +1808,7 @@ static zend_class_entry *register_class_DOMText(zend_class_entry *class_entry_DO
INIT_CLASS_ENTRY(ce, "DOMText", class_DOMText_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMCharacterData);
+ zend_register_class_alias("DOM\\Text", class_entry);
zval property_wholeText_default_value;
ZVAL_UNDEF(&property_wholeText_default_value);
@@ -1799,6 +1826,7 @@ static zend_class_entry *register_class_DOMNamedNodeMap(zend_class_entry *class_
INIT_CLASS_ENTRY(ce, "DOMNamedNodeMap", class_DOMNamedNodeMap_methods);
class_entry = zend_register_internal_class_ex(&ce, NULL);
zend_class_implements(class_entry, 2, class_entry_IteratorAggregate, class_entry_Countable);
+ zend_register_class_alias("DOM\\NamedNodeMap", class_entry);
zval property_length_default_value;
ZVAL_UNDEF(&property_length_default_value);
@@ -1815,6 +1843,7 @@ static zend_class_entry *register_class_DOMEntity(zend_class_entry *class_entry_
INIT_CLASS_ENTRY(ce, "DOMEntity", class_DOMEntity_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMNode);
+ zend_register_class_alias("DOM\\Entity", class_entry);
zval property_publicId_default_value;
ZVAL_UNDEF(&property_publicId_default_value);
@@ -1861,6 +1890,7 @@ static zend_class_entry *register_class_DOMEntityReference(zend_class_entry *cla
INIT_CLASS_ENTRY(ce, "DOMEntityReference", class_DOMEntityReference_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMNode);
+ zend_register_class_alias("DOM\\EntityReference", class_entry);
return class_entry;
}
@@ -1871,6 +1901,7 @@ static zend_class_entry *register_class_DOMNotation(zend_class_entry *class_entr
INIT_CLASS_ENTRY(ce, "DOMNotation", class_DOMNotation_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMNode);
+ zend_register_class_alias("DOM\\Notation", class_entry);
zval property_publicId_default_value;
ZVAL_UNDEF(&property_publicId_default_value);
@@ -1893,6 +1924,7 @@ static zend_class_entry *register_class_DOMProcessingInstruction(zend_class_entr
INIT_CLASS_ENTRY(ce, "DOMProcessingInstruction", class_DOMProcessingInstruction_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMNode);
+ zend_register_class_alias("DOM\\ProcessingInstruction", class_entry);
zval property_target_default_value;
ZVAL_UNDEF(&property_target_default_value);
@@ -1917,6 +1949,7 @@ static zend_class_entry *register_class_DOMXPath(void)
INIT_CLASS_ENTRY(ce, "DOMXPath", class_DOMXPath_methods);
class_entry = zend_register_internal_class_ex(&ce, NULL);
class_entry->ce_flags |= ZEND_ACC_NOT_SERIALIZABLE;
+ zend_register_class_alias("DOM\\XPath", class_entry);
zval property_document_default_value;
ZVAL_UNDEF(&property_document_default_value);
From 5776832ffbc8bb494d0df137e1b84370e358e48d Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 17 Sep 2023 15:36:42 +0200
Subject: [PATCH 08/53] Introduce common base class
---
ext/dom/document.c | 42 +-
ext/dom/dom_ce.h | 1 +
ext/dom/php_dom.c | 6 +-
ext/dom/php_dom.stub.php | 209 ++++----
ext/dom/php_dom_arginfo.h | 469 +++++++++---------
ext/dom/tests/DOMDocument_adoptNode.phpt | 2 +-
...Document_relaxNGValidateSource_error1.phpt | 2 +-
...Document_relaxNGValidateSource_error2.phpt | 4 +-
.../DOMDocument_relaxNGValidate_error1.phpt | 2 +-
.../DOMDocument_relaxNGValidate_error2.phpt | 6 +-
...MDocument_schemaValidateSource_error1.phpt | 10 +-
...MDocument_schemaValidateSource_error2.phpt | 2 +-
...MDocument_schemaValidateSource_error3.phpt | 2 +-
.../DOMDocument_schemaValidate_error1.phpt | 10 +-
.../DOMDocument_schemaValidate_error2.phpt | 2 +-
.../DOMDocument_schemaValidate_error3.phpt | 2 +-
.../DOMDocument_schemaValidate_error5.phpt | 6 +-
.../DOMDocument_schemaValidate_error6.phpt | 4 +-
...ocument_strictErrorChecking_variation.phpt | 2 +-
.../Document_registerNodeClass_02.phpt | 4 +-
20 files changed, 402 insertions(+), 385 deletions(-)
diff --git a/ext/dom/document.c b/ext/dom/document.c
index cbaa29fb775c4..a31d9d58c052b 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -483,7 +483,7 @@ zend_result dom_document_config_read(dom_object *obj, zval *retval)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-2141741547
Since:
*/
-PHP_METHOD(DOMDocument, createElement)
+PHP_METHOD(DOM_Document, createElement)
{
zval *id;
xmlNode *node;
@@ -518,7 +518,7 @@ PHP_METHOD(DOMDocument, createElement)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-35CB04B5
Since:
*/
-PHP_METHOD(DOMDocument, createDocumentFragment)
+PHP_METHOD(DOM_Document, createDocumentFragment)
{
zval *id;
xmlNode *node;
@@ -546,7 +546,7 @@ PHP_METHOD(DOMDocument, createDocumentFragment)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-1975348127
Since:
*/
-PHP_METHOD(DOMDocument, createTextNode)
+PHP_METHOD(DOM_Document, createTextNode)
{
zval *id;
xmlNode *node;
@@ -576,7 +576,7 @@ PHP_METHOD(DOMDocument, createTextNode)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-1334481328
Since:
*/
-PHP_METHOD(DOMDocument, createComment)
+PHP_METHOD(DOM_Document, createComment)
{
zval *id;
xmlNode *node;
@@ -606,7 +606,7 @@ PHP_METHOD(DOMDocument, createComment)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-D26C0AF8
Since:
*/
-PHP_METHOD(DOMDocument, createCDATASection)
+PHP_METHOD(DOM_Document, createCDATASection)
{
zval *id;
xmlNode *node;
@@ -636,7 +636,7 @@ PHP_METHOD(DOMDocument, createCDATASection)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-135944439
Since:
*/
-PHP_METHOD(DOMDocument, createProcessingInstruction)
+PHP_METHOD(DOM_Document, createProcessingInstruction)
{
zval *id;
xmlNode *node;
@@ -673,7 +673,7 @@ PHP_METHOD(DOMDocument, createProcessingInstruction)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-1084891198
Since:
*/
-PHP_METHOD(DOMDocument, createAttribute)
+PHP_METHOD(DOM_Document, createAttribute)
{
zval *id;
xmlAttrPtr node;
@@ -744,7 +744,7 @@ PHP_METHOD(DOMDocument, createEntityReference)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-A6C9094
Since:
*/
-PHP_METHOD(DOMDocument, getElementsByTagName)
+PHP_METHOD(DOM_Document, getElementsByTagName)
{
size_t name_len;
dom_object *intern, *namednode;
@@ -765,7 +765,7 @@ PHP_METHOD(DOMDocument, getElementsByTagName)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#Core-Document-importNode
Since: DOM Level 2
*/
-PHP_METHOD(DOMDocument, importNode)
+PHP_METHOD(DOM_Document, importNode)
{
zval *node;
xmlDocPtr docp;
@@ -818,7 +818,7 @@ PHP_METHOD(DOMDocument, importNode)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-DocCrElNS
Since: DOM Level 2
*/
-PHP_METHOD(DOMDocument, createElementNS)
+PHP_METHOD(DOM_Document, createElementNS)
{
zval *id;
xmlDocPtr docp;
@@ -878,7 +878,7 @@ PHP_METHOD(DOMDocument, createElementNS)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-DocCrAttrNS
Since: DOM Level 2
*/
-PHP_METHOD(DOMDocument, createAttributeNS)
+PHP_METHOD(DOM_Document, createAttributeNS)
{
zval *id;
xmlDocPtr docp;
@@ -944,7 +944,7 @@ PHP_METHOD(DOMDocument, createAttributeNS)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-getElBTNNS
Since: DOM Level 2
*/
-PHP_METHOD(DOMDocument, getElementsByTagNameNS)
+PHP_METHOD(DOM_Document, getElementsByTagNameNS)
{
size_t uri_len, name_len;
dom_object *intern, *namednode;
@@ -965,7 +965,7 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-getElBId
Since: DOM Level 2
*/
-PHP_METHOD(DOMDocument, getElementById)
+PHP_METHOD(DOM_Document, getElementById)
{
zval *id;
xmlDocPtr docp;
@@ -1042,7 +1042,7 @@ bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, x
Since: DOM Level 3
Modern spec URL: https://dom.spec.whatwg.org/#dom-document-adoptnode
*/
-PHP_METHOD(DOMDocument, adoptNode)
+PHP_METHOD(DOM_Document, adoptNode)
{
zval *node_zval;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &node_zval, dom_node_class_entry) == FAILURE) {
@@ -1079,7 +1079,7 @@ PHP_METHOD(DOMDocument, adoptNode)
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-Document3-normalizeDocument
Since: DOM Level 3
*/
-PHP_METHOD(DOMDocument, normalizeDocument)
+PHP_METHOD(DOM_Document, normalizeDocument)
{
zval *id;
xmlDocPtr docp;
@@ -1755,14 +1755,14 @@ static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type
/* }}} */
/* {{{ */
-PHP_METHOD(DOMDocument, schemaValidate)
+PHP_METHOD(DOM_Document, schemaValidate)
{
_dom_document_schema_validate(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
}
/* }}} end dom_document_schema_validate_file */
/* {{{ */
-PHP_METHOD(DOMDocument, schemaValidateSource)
+PHP_METHOD(DOM_Document, schemaValidateSource)
{
_dom_document_schema_validate(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
}
@@ -1851,14 +1851,14 @@ static void _dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAMETERS, int typ
/* }}} */
/* {{{ */
-PHP_METHOD(DOMDocument, relaxNGValidate)
+PHP_METHOD(DOM_Document, relaxNGValidate)
{
_dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
}
/* }}} end dom_document_relaxNG_validate_file */
/* {{{ */
-PHP_METHOD(DOMDocument, relaxNGValidateSource)
+PHP_METHOD(DOM_Document, relaxNGValidateSource)
{
_dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
}
@@ -2064,7 +2064,7 @@ PHP_METHOD(DOMDocument, saveHTML)
#endif /* defined(LIBXML_HTML_ENABLED) */
/* {{{ Register extended class used to create base node type */
-PHP_METHOD(DOMDocument, registerNodeClass)
+PHP_METHOD(DOM_Document, registerNodeClass)
{
zend_class_entry *basece = dom_node_class_entry, *ce = NULL;
dom_object *intern;
@@ -2090,7 +2090,7 @@ PHP_METHOD(DOMDocument, registerNodeClass)
/* {{{ URL: https://dom.spec.whatwg.org/#dom-parentnode-replacechildren
Since:
*/
-PHP_METHOD(DOMDocument, replaceChildren)
+PHP_METHOD(DOM_Document, replaceChildren)
{
uint32_t argc = 0;
zval *args;
diff --git a/ext/dom/dom_ce.h b/ext/dom/dom_ce.h
index 399e21d2900ce..a489b059abcad 100644
--- a/ext/dom/dom_ce.h
+++ b/ext/dom/dom_ce.h
@@ -37,6 +37,7 @@ extern PHP_DOM_EXPORT zend_class_entry *dom_notation_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_entity_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_entityreference_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_processinginstruction_class_entry;
+extern PHP_DOM_EXPORT zend_class_entry *dom_abstract_base_document_class_entry;
#ifdef LIBXML_XPATH_ENABLED
extern PHP_DOM_EXPORT zend_class_entry *dom_xpath_class_entry;
#endif
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index 2fcdae09709b1..8b302dc0f629d 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -56,6 +56,7 @@ PHP_DOM_EXPORT zend_class_entry *dom_notation_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_entity_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_entityreference_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_processinginstruction_class_entry;
+PHP_DOM_EXPORT zend_class_entry *dom_abstract_base_document_class_entry;
#ifdef LIBXML_XPATH_ENABLED
PHP_DOM_EXPORT zend_class_entry *dom_xpath_class_entry;
#endif
@@ -696,7 +697,10 @@ PHP_MINIT_FUNCTION(dom)
zend_hash_merge(&dom_documentfragment_prop_handlers, &dom_node_prop_handlers, dom_copy_prop_handler, 0);
zend_hash_add_ptr(&classes, dom_documentfragment_class_entry->name, &dom_documentfragment_prop_handlers);
- dom_document_class_entry = register_class_DOMDocument(dom_node_class_entry, dom_parentnode_class_entry);
+ dom_abstract_base_document_class_entry = register_class_DOM_Document(dom_node_class_entry, dom_parentnode_class_entry);
+ // TODO: prop handlers
+
+ dom_document_class_entry = register_class_DOMDocument(dom_abstract_base_document_class_entry);
dom_document_class_entry->create_object = dom_objects_new;
zend_hash_init(&dom_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1);
dom_register_prop_handler(&dom_document_prop_handlers, "doctype", sizeof("doctype")-1, dom_document_doctype_read, NULL);
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index 54c702f068bd0..ce7cdb6ce6e6b 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -513,7 +513,7 @@ public function prepend(...$nodes): void {}
/**
* @param DOMNode|string $nodes
- * @implementation-alias DOMDocument::replaceChildren
+ * @implementation-alias DOM\Document::replaceChildren
*/
public function replaceChildren(...$nodes): void {}
}
@@ -720,25 +720,14 @@ public function insertAdjacentElement(string $where, DOMElement $element): ?DOME
public function insertAdjacentText(string $where, string $data): void {}
}
- class DOMDocument extends DOMNode implements DOMParentNode
+ class DOMDocument extends DOM\Document
{
- /** @readonly */
- public ?DOMDocumentType $doctype;
-
- /** @readonly */
- public DOMImplementation $implementation;
-
- /** @readonly */
- public ?DOMElement $documentElement;
-
/**
* @readonly
* @deprecated
*/
public ?string $actualEncoding;
- public ?string $encoding;
-
/** @readonly */
public ?string $xmlEncoding;
@@ -750,10 +739,6 @@ class DOMDocument extends DOMNode implements DOMParentNode
public ?string $xmlVersion;
- public bool $strictErrorChecking;
-
- public ?string $documentURI;
-
/**
* @readonly
* @deprecated
@@ -772,71 +757,17 @@ class DOMDocument extends DOMNode implements DOMParentNode
public bool $substituteEntities;
- /** @readonly */
- public ?DOMElement $firstElementChild;
-
- /** @readonly */
- public ?DOMElement $lastElementChild;
-
- /** @readonly */
- public int $childElementCount;
-
public function __construct(string $version = "1.0", string $encoding = "") {}
- /** @return DOMAttr|false */
- public function createAttribute(string $localName) {}
-
- /** @return DOMAttr|false */
- public function createAttributeNS(?string $namespace, string $qualifiedName) {}
-
- /** @return DOMCdataSection|false */
- public function createCDATASection(string $data) {}
-
- /** @tentative-return-type */
- public function createComment(string $data): DOMComment {}
-
- /** @tentative-return-type */
- public function createDocumentFragment(): DOMDocumentFragment {}
-
- /** @return DOMElement|false */
- public function createElement(string $localName, string $value = "") {}
-
- /** @return DOMElement|false */
- public function createElementNS(?string $namespace, string $qualifiedName, string $value = "") {}
-
/** @return DOMEntityReference|false */
public function createEntityReference(string $name) {}
- /** @return DOMProcessingInstruction|false */
- public function createProcessingInstruction(string $target, string $data = "") {}
-
- /** @tentative-return-type */
- public function createTextNode(string $data): DOMText {}
-
- /** @tentative-return-type */
- public function getElementById(string $elementId): ?DOMElement {}
-
- /** @tentative-return-type */
- public function getElementsByTagName(string $qualifiedName): DOMNodeList {}
-
- /** @tentative-return-type */
- public function getElementsByTagNameNS(?string $namespace, string $localName): DOMNodeList {}
-
- /** @return DOMNode|false */
- public function importNode(DOMNode $node, bool $deep = false) {}
-
/** @tentative-return-type */
public function load(string $filename, int $options = 0): bool {}
/** @tentative-return-type */
public function loadXML(string $source, int $options = 0): bool {}
- /** @tentative-return-type */
- public function normalizeDocument(): void {}
-
- /** @tentative-return-type */
- public function registerNodeClass(string $baseClass, ?string $extendedClass): bool {}
-
/** @tentative-return-type */
public function save(string $filename, int $options = 0): int|false {}
@@ -857,43 +788,11 @@ public function saveHTMLFile(string $filename): int|false {}
/** @tentative-return-type */
public function saveXML(?DOMNode $node = null, int $options = 0): string|false {}
- #ifdef LIBXML_SCHEMAS_ENABLED
- /** @tentative-return-type */
- public function schemaValidate(string $filename, int $flags = 0): bool {}
-
- /** @tentative-return-type */
- public function schemaValidateSource(string $source, int $flags = 0): bool {}
-
- /** @tentative-return-type */
- public function relaxNGValidate(string $filename): bool {}
-
- /** @tentative-return-type */
- public function relaxNGValidateSource(string $source): bool {}
- #endif
-
/** @tentative-return-type */
public function validate(): bool {}
/** @tentative-return-type */
public function xinclude(int $options = 0): int|false {}
-
- /** @tentative-return-type */
- public function adoptNode(DOMNode $node): DOMNode|false {}
-
- /**
- * @param DOMNode|string $nodes
- * @implementation-alias DOMElement::append
- */
- public function append(...$nodes): void {}
-
- /**
- * @param DOMNode|string $nodes
- * @implementation-alias DOMElement::prepend
- */
- public function prepend(...$nodes): void {}
-
- /** @param DOMNode|string $nodes */
- public function replaceChildren(...$nodes): void {}
}
/** @alias DOM\Exception */
@@ -1131,6 +1030,110 @@ function dom_import_simplexml(object $node): DOMElement {}
*/
const HTML_NO_DEFAULT_NS = UNKNOWN;
+ abstract class Document extends DOMNode implements DOMParentNode
+ {
+ /** @readonly */
+ public ?DocumentType $doctype;
+
+ /** @readonly */
+ public Implementation $implementation;
+
+ /** @readonly */
+ public ?Element $documentElement;
+
+ public ?string $encoding;
+
+ public bool $strictErrorChecking;
+
+ public ?string $documentURI;
+
+ /** @readonly */
+ public ?Element $firstElementChild;
+
+ /** @readonly */
+ public ?Element $lastElementChild;
+
+ /** @readonly */
+ public int $childElementCount;
+
+ /** @return Attr|false */
+ public function createAttribute(string $localName) {}
+
+ /** @return Attr|false */
+ public function createAttributeNS(?string $namespace, string $qualifiedName) {}
+
+ /** @return CDataSection|false */
+ public function createCDATASection(string $data) {}
+
+ /** @tentative-return-type */
+ public function createComment(string $data): Comment {}
+
+ /** @tentative-return-type */
+ public function createDocumentFragment(): DocumentFragment {}
+
+ /** @return Element|false */
+ public function createElement(string $localName, string $value = "") {}
+
+ /** @return Element|false */
+ public function createElementNS(?string $namespace, string $qualifiedName, string $value = "") {}
+
+ /** @return ProcessingInstruction|false */
+ public function createProcessingInstruction(string $target, string $data = "") {}
+
+ /** @tentative-return-type */
+ public function createTextNode(string $data): Text {}
+
+ /** @tentative-return-type */
+ public function getElementById(string $elementId): ?Element {}
+
+ /** @tentative-return-type */
+ public function getElementsByTagName(string $qualifiedName): NodeList {}
+
+ /** @tentative-return-type */
+ public function getElementsByTagNameNS(?string $namespace, string $localName): NodeList {}
+
+ /** @return Node|false */
+ public function importNode(Node $node, bool $deep = false) {}
+
+ /** @tentative-return-type */
+ public function normalizeDocument(): void {}
+
+ /** @tentative-return-type */
+ public function registerNodeClass(string $baseClass, ?string $extendedClass): bool {}
+
+ #ifdef LIBXML_SCHEMAS_ENABLED
+ /** @tentative-return-type */
+ public function schemaValidate(string $filename, int $flags = 0): bool {}
+
+ /** @tentative-return-type */
+ public function schemaValidateSource(string $source, int $flags = 0): bool {}
+
+ /** @tentative-return-type */
+ public function relaxNGValidate(string $filename): bool {}
+
+ /** @tentative-return-type */
+ public function relaxNGValidateSource(string $source): bool {}
+ #endif
+
+ /** @tentative-return-type */
+ public function adoptNode(Node $node): Node|false {}
+
+ /**
+ * @param Node|string $nodes
+ * @implementation-alias DOMElement::append
+ */
+ public function append(...$nodes): void {}
+
+ /**
+ * @param Node|string $nodes
+ * @implementation-alias DOMElement::prepend
+ */
+ public function prepend(...$nodes): void {}
+
+ /** @param Node|string $nodes */
+ public function replaceChildren(...$nodes): void {}
+ }
+
class HTML5Document extends \DOMDocument
{
public function __construct(string $xmlVersion = "1.0", string $encoding = "") {}
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index 31667facad9c7..be7d13724ea31 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,9 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
-<<<<<<< HEAD
- * Stub hash: ffe5e718fe6449a3c93c62feb12bc9e3c57e340d */
-=======
- * Stub hash: a099cfd8989a44593d352c5d4ace393f12e10c95 */
->>>>>>> 6cdbadbf86 (Create class aliases)
+ * Stub hash: 2ee03c3ce38c5f9e89cc16015df7f8455713f879 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -336,61 +332,10 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOMDocument___construct, 0, 0, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 0, "\"\"")
ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOMDocument_createAttribute, 0, 0, 1)
- ZEND_ARG_TYPE_INFO(0, localName, IS_STRING, 0)
-ZEND_END_ARG_INFO()
-
-ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOMDocument_createAttributeNS, 0, 0, 2)
- ZEND_ARG_TYPE_INFO(0, namespace, IS_STRING, 1)
- ZEND_ARG_TYPE_INFO(0, qualifiedName, IS_STRING, 0)
-ZEND_END_ARG_INFO()
-
-#define arginfo_class_DOMDocument_createCDATASection arginfo_class_DOMCdataSection___construct
-
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOMDocument_createComment, 0, 1, DOMComment, 0)
- ZEND_ARG_TYPE_INFO(0, data, IS_STRING, 0)
-ZEND_END_ARG_INFO()
-
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOMDocument_createDocumentFragment, 0, 0, DOMDocumentFragment, 0)
-ZEND_END_ARG_INFO()
-
-ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOMDocument_createElement, 0, 0, 1)
- ZEND_ARG_TYPE_INFO(0, localName, IS_STRING, 0)
- ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, value, IS_STRING, 0, "\"\"")
-ZEND_END_ARG_INFO()
-
-ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOMDocument_createElementNS, 0, 0, 2)
- ZEND_ARG_TYPE_INFO(0, namespace, IS_STRING, 1)
- ZEND_ARG_TYPE_INFO(0, qualifiedName, IS_STRING, 0)
- ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, value, IS_STRING, 0, "\"\"")
-ZEND_END_ARG_INFO()
-
ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOMDocument_createEntityReference, 0, 0, 1)
ZEND_ARG_TYPE_INFO(0, name, IS_STRING, 0)
ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOMDocument_createProcessingInstruction, 0, 0, 1)
- ZEND_ARG_TYPE_INFO(0, target, IS_STRING, 0)
- ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, data, IS_STRING, 0, "\"\"")
-ZEND_END_ARG_INFO()
-
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOMDocument_createTextNode, 0, 1, DOMText, 0)
- ZEND_ARG_TYPE_INFO(0, data, IS_STRING, 0)
-ZEND_END_ARG_INFO()
-
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOMDocument_getElementById, 0, 1, DOMElement, 1)
- ZEND_ARG_TYPE_INFO(0, elementId, IS_STRING, 0)
-ZEND_END_ARG_INFO()
-
-#define arginfo_class_DOMDocument_getElementsByTagName arginfo_class_DOMElement_getElementsByTagName
-
-#define arginfo_class_DOMDocument_getElementsByTagNameNS arginfo_class_DOMElement_getElementsByTagNameNS
-
-ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOMDocument_importNode, 0, 0, 1)
- ZEND_ARG_OBJ_INFO(0, node, DOMNode, 0)
- ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, deep, _IS_BOOL, 0, "false")
-ZEND_END_ARG_INFO()
-
ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOMDocument_load, 0, 1, _IS_BOOL, 0)
ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
@@ -401,13 +346,6 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOMDocument_load
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
ZEND_END_ARG_INFO()
-#define arginfo_class_DOMDocument_normalizeDocument arginfo_class_DOMNode_normalize
-
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOMDocument_registerNodeClass, 0, 2, _IS_BOOL, 0)
- ZEND_ARG_TYPE_INFO(0, baseClass, IS_STRING, 0)
- ZEND_ARG_TYPE_INFO(0, extendedClass, IS_STRING, 1)
-ZEND_END_ARG_INFO()
-
ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_MASK_EX(arginfo_class_DOMDocument_save, 0, 1, MAY_BE_LONG|MAY_BE_FALSE)
ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
@@ -444,48 +382,12 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_MASK_EX(arginfo_class_DOMDocument_save
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
ZEND_END_ARG_INFO()
-#if defined(LIBXML_SCHEMAS_ENABLED)
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOMDocument_schemaValidate, 0, 1, _IS_BOOL, 0)
- ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0)
- ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "0")
-ZEND_END_ARG_INFO()
-#endif
-
-#if defined(LIBXML_SCHEMAS_ENABLED)
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOMDocument_schemaValidateSource, 0, 1, _IS_BOOL, 0)
- ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0)
- ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "0")
-ZEND_END_ARG_INFO()
-#endif
-
-#if defined(LIBXML_SCHEMAS_ENABLED)
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOMDocument_relaxNGValidate, 0, 1, _IS_BOOL, 0)
- ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0)
-ZEND_END_ARG_INFO()
-#endif
-
-#if defined(LIBXML_SCHEMAS_ENABLED)
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOMDocument_relaxNGValidateSource, 0, 1, _IS_BOOL, 0)
- ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0)
-ZEND_END_ARG_INFO()
-#endif
-
#define arginfo_class_DOMDocument_validate arginfo_class_DOMNode_hasAttributes
ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_MASK_EX(arginfo_class_DOMDocument_xinclude, 0, 0, MAY_BE_LONG|MAY_BE_FALSE)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_TYPE_MASK_EX(arginfo_class_DOMDocument_adoptNode, 0, 1, DOMNode, MAY_BE_FALSE)
- ZEND_ARG_OBJ_INFO(0, node, DOMNode, 0)
-ZEND_END_ARG_INFO()
-
-#define arginfo_class_DOMDocument_append arginfo_class_DOMParentNode_append
-
-#define arginfo_class_DOMDocument_prepend arginfo_class_DOMParentNode_append
-
-#define arginfo_class_DOMDocument_replaceChildren arginfo_class_DOMParentNode_append
-
#define arginfo_class_DOMText___construct arginfo_class_DOMComment___construct
#define arginfo_class_DOMText_isWhitespaceInElementContent arginfo_class_DOMNode_hasAttributes
@@ -549,6 +451,105 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOMXPath_registe
ZEND_END_ARG_INFO()
#endif
+ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_Document_createAttribute, 0, 0, 1)
+ ZEND_ARG_TYPE_INFO(0, localName, IS_STRING, 0)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_Document_createAttributeNS, 0, 0, 2)
+ ZEND_ARG_TYPE_INFO(0, namespace, IS_STRING, 1)
+ ZEND_ARG_TYPE_INFO(0, qualifiedName, IS_STRING, 0)
+ZEND_END_ARG_INFO()
+
+#define arginfo_class_DOM_Document_createCDATASection arginfo_class_DOMCdataSection___construct
+
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_createComment, 0, 1, DOM\\Comment, 0)
+ ZEND_ARG_TYPE_INFO(0, data, IS_STRING, 0)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_createDocumentFragment, 0, 0, DOM\\DocumentFragment, 0)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_Document_createElement, 0, 0, 1)
+ ZEND_ARG_TYPE_INFO(0, localName, IS_STRING, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, value, IS_STRING, 0, "\"\"")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_Document_createElementNS, 0, 0, 2)
+ ZEND_ARG_TYPE_INFO(0, namespace, IS_STRING, 1)
+ ZEND_ARG_TYPE_INFO(0, qualifiedName, IS_STRING, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, value, IS_STRING, 0, "\"\"")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_Document_createProcessingInstruction, 0, 0, 1)
+ ZEND_ARG_TYPE_INFO(0, target, IS_STRING, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, data, IS_STRING, 0, "\"\"")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_createTextNode, 0, 1, DOM\\Text, 0)
+ ZEND_ARG_TYPE_INFO(0, data, IS_STRING, 0)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getElementById, 0, 1, DOM\\Element, 1)
+ ZEND_ARG_TYPE_INFO(0, elementId, IS_STRING, 0)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getElementsByTagName, 0, 1, DOM\\NodeList, 0)
+ ZEND_ARG_TYPE_INFO(0, qualifiedName, IS_STRING, 0)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getElementsByTagNameNS, 0, 2, DOM\\NodeList, 0)
+ ZEND_ARG_TYPE_INFO(0, namespace, IS_STRING, 1)
+ ZEND_ARG_TYPE_INFO(0, localName, IS_STRING, 0)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_Document_importNode, 0, 0, 1)
+ ZEND_ARG_OBJ_INFO(0, node, DOM\\Node, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, deep, _IS_BOOL, 0, "false")
+ZEND_END_ARG_INFO()
+
+#define arginfo_class_DOM_Document_normalizeDocument arginfo_class_DOMNode_normalize
+
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOM_Document_registerNodeClass, 0, 2, _IS_BOOL, 0)
+ ZEND_ARG_TYPE_INFO(0, baseClass, IS_STRING, 0)
+ ZEND_ARG_TYPE_INFO(0, extendedClass, IS_STRING, 1)
+ZEND_END_ARG_INFO()
+
+#if defined(LIBXML_SCHEMAS_ENABLED)
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOM_Document_schemaValidate, 0, 1, _IS_BOOL, 0)
+ ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "0")
+ZEND_END_ARG_INFO()
+#endif
+
+#if defined(LIBXML_SCHEMAS_ENABLED)
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOM_Document_schemaValidateSource, 0, 1, _IS_BOOL, 0)
+ ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "0")
+ZEND_END_ARG_INFO()
+#endif
+
+#if defined(LIBXML_SCHEMAS_ENABLED)
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOM_Document_relaxNGValidate, 0, 1, _IS_BOOL, 0)
+ ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0)
+ZEND_END_ARG_INFO()
+#endif
+
+#if defined(LIBXML_SCHEMAS_ENABLED)
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOM_Document_relaxNGValidateSource, 0, 1, _IS_BOOL, 0)
+ ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0)
+ZEND_END_ARG_INFO()
+#endif
+
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_TYPE_MASK_EX(arginfo_class_DOM_Document_adoptNode, 0, 1, DOM\\Node, MAY_BE_FALSE)
+ ZEND_ARG_OBJ_INFO(0, node, DOM\\Node, 0)
+ZEND_END_ARG_INFO()
+
+#define arginfo_class_DOM_Document_append arginfo_class_DOMParentNode_append
+
+#define arginfo_class_DOM_Document_prepend arginfo_class_DOMParentNode_append
+
+#define arginfo_class_DOM_Document_replaceChildren arginfo_class_DOMParentNode_append
+
ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_HTML5Document___construct, 0, 0, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, xmlVersion, IS_STRING, 0, "\"1.0\"")
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 0, "\"\"")
@@ -611,7 +612,7 @@ ZEND_METHOD(DOMDocumentFragment, __construct);
ZEND_METHOD(DOMDocumentFragment, appendXML);
ZEND_METHOD(DOMElement, append);
ZEND_METHOD(DOMElement, prepend);
-ZEND_METHOD(DOMDocument, replaceChildren);
+ZEND_METHOD(DOM_Document, replaceChildren);
ZEND_METHOD(DOMNodeList, count);
ZEND_METHOD(DOMNodeList, getIterator);
ZEND_METHOD(DOMNodeList, item);
@@ -651,24 +652,9 @@ ZEND_METHOD(DOMElement, replaceChildren);
ZEND_METHOD(DOMElement, insertAdjacentElement);
ZEND_METHOD(DOMElement, insertAdjacentText);
ZEND_METHOD(DOMDocument, __construct);
-ZEND_METHOD(DOMDocument, createAttribute);
-ZEND_METHOD(DOMDocument, createAttributeNS);
-ZEND_METHOD(DOMDocument, createCDATASection);
-ZEND_METHOD(DOMDocument, createComment);
-ZEND_METHOD(DOMDocument, createDocumentFragment);
-ZEND_METHOD(DOMDocument, createElement);
-ZEND_METHOD(DOMDocument, createElementNS);
ZEND_METHOD(DOMDocument, createEntityReference);
-ZEND_METHOD(DOMDocument, createProcessingInstruction);
-ZEND_METHOD(DOMDocument, createTextNode);
-ZEND_METHOD(DOMDocument, getElementById);
-ZEND_METHOD(DOMDocument, getElementsByTagName);
-ZEND_METHOD(DOMDocument, getElementsByTagNameNS);
-ZEND_METHOD(DOMDocument, importNode);
ZEND_METHOD(DOMDocument, load);
ZEND_METHOD(DOMDocument, loadXML);
-ZEND_METHOD(DOMDocument, normalizeDocument);
-ZEND_METHOD(DOMDocument, registerNodeClass);
ZEND_METHOD(DOMDocument, save);
#if defined(LIBXML_HTML_ENABLED)
ZEND_METHOD(DOMDocument, loadHTML);
@@ -683,21 +669,8 @@ ZEND_METHOD(DOMDocument, saveHTML);
ZEND_METHOD(DOMDocument, saveHTMLFile);
#endif
ZEND_METHOD(DOMDocument, saveXML);
-#if defined(LIBXML_SCHEMAS_ENABLED)
-ZEND_METHOD(DOMDocument, schemaValidate);
-#endif
-#if defined(LIBXML_SCHEMAS_ENABLED)
-ZEND_METHOD(DOMDocument, schemaValidateSource);
-#endif
-#if defined(LIBXML_SCHEMAS_ENABLED)
-ZEND_METHOD(DOMDocument, relaxNGValidate);
-#endif
-#if defined(LIBXML_SCHEMAS_ENABLED)
-ZEND_METHOD(DOMDocument, relaxNGValidateSource);
-#endif
ZEND_METHOD(DOMDocument, validate);
ZEND_METHOD(DOMDocument, xinclude);
-ZEND_METHOD(DOMDocument, adoptNode);
ZEND_METHOD(DOMText, __construct);
ZEND_METHOD(DOMText, isWhitespaceInElementContent);
ZEND_METHOD(DOMText, splitText);
@@ -723,6 +696,34 @@ ZEND_METHOD(DOMXPath, registerNamespace);
#if defined(LIBXML_XPATH_ENABLED)
ZEND_METHOD(DOMXPath, registerPhpFunctions);
#endif
+ZEND_METHOD(DOM_Document, createAttribute);
+ZEND_METHOD(DOM_Document, createAttributeNS);
+ZEND_METHOD(DOM_Document, createCDATASection);
+ZEND_METHOD(DOM_Document, createComment);
+ZEND_METHOD(DOM_Document, createDocumentFragment);
+ZEND_METHOD(DOM_Document, createElement);
+ZEND_METHOD(DOM_Document, createElementNS);
+ZEND_METHOD(DOM_Document, createProcessingInstruction);
+ZEND_METHOD(DOM_Document, createTextNode);
+ZEND_METHOD(DOM_Document, getElementById);
+ZEND_METHOD(DOM_Document, getElementsByTagName);
+ZEND_METHOD(DOM_Document, getElementsByTagNameNS);
+ZEND_METHOD(DOM_Document, importNode);
+ZEND_METHOD(DOM_Document, normalizeDocument);
+ZEND_METHOD(DOM_Document, registerNodeClass);
+#if defined(LIBXML_SCHEMAS_ENABLED)
+ZEND_METHOD(DOM_Document, schemaValidate);
+#endif
+#if defined(LIBXML_SCHEMAS_ENABLED)
+ZEND_METHOD(DOM_Document, schemaValidateSource);
+#endif
+#if defined(LIBXML_SCHEMAS_ENABLED)
+ZEND_METHOD(DOM_Document, relaxNGValidate);
+#endif
+#if defined(LIBXML_SCHEMAS_ENABLED)
+ZEND_METHOD(DOM_Document, relaxNGValidateSource);
+#endif
+ZEND_METHOD(DOM_Document, adoptNode);
ZEND_METHOD(DOM_HTML5Document, __construct);
ZEND_METHOD(DOM_HTML5Document, load);
ZEND_METHOD(DOM_HTML5Document, loadXML);
@@ -822,7 +823,7 @@ static const zend_function_entry class_DOMDocumentFragment_methods[] = {
ZEND_ME(DOMDocumentFragment, appendXML, arginfo_class_DOMDocumentFragment_appendXML, ZEND_ACC_PUBLIC)
ZEND_MALIAS(DOMElement, append, append, arginfo_class_DOMDocumentFragment_append, ZEND_ACC_PUBLIC)
ZEND_MALIAS(DOMElement, prepend, prepend, arginfo_class_DOMDocumentFragment_prepend, ZEND_ACC_PUBLIC)
- ZEND_MALIAS(DOMDocument, replaceChildren, replaceChildren, arginfo_class_DOMDocumentFragment_replaceChildren, ZEND_ACC_PUBLIC)
+ ZEND_MALIAS(DOM_Document, replaceChildren, replaceChildren, arginfo_class_DOMDocumentFragment_replaceChildren, ZEND_ACC_PUBLIC)
ZEND_FE_END
};
@@ -893,24 +894,9 @@ static const zend_function_entry class_DOMElement_methods[] = {
static const zend_function_entry class_DOMDocument_methods[] = {
ZEND_ME(DOMDocument, __construct, arginfo_class_DOMDocument___construct, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, createAttribute, arginfo_class_DOMDocument_createAttribute, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, createAttributeNS, arginfo_class_DOMDocument_createAttributeNS, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, createCDATASection, arginfo_class_DOMDocument_createCDATASection, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, createComment, arginfo_class_DOMDocument_createComment, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, createDocumentFragment, arginfo_class_DOMDocument_createDocumentFragment, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, createElement, arginfo_class_DOMDocument_createElement, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, createElementNS, arginfo_class_DOMDocument_createElementNS, ZEND_ACC_PUBLIC)
ZEND_ME(DOMDocument, createEntityReference, arginfo_class_DOMDocument_createEntityReference, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, createProcessingInstruction, arginfo_class_DOMDocument_createProcessingInstruction, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, createTextNode, arginfo_class_DOMDocument_createTextNode, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, getElementById, arginfo_class_DOMDocument_getElementById, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, getElementsByTagName, arginfo_class_DOMDocument_getElementsByTagName, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, getElementsByTagNameNS, arginfo_class_DOMDocument_getElementsByTagNameNS, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, importNode, arginfo_class_DOMDocument_importNode, ZEND_ACC_PUBLIC)
ZEND_ME(DOMDocument, load, arginfo_class_DOMDocument_load, ZEND_ACC_PUBLIC)
ZEND_ME(DOMDocument, loadXML, arginfo_class_DOMDocument_loadXML, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, normalizeDocument, arginfo_class_DOMDocument_normalizeDocument, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, registerNodeClass, arginfo_class_DOMDocument_registerNodeClass, ZEND_ACC_PUBLIC)
ZEND_ME(DOMDocument, save, arginfo_class_DOMDocument_save, ZEND_ACC_PUBLIC)
#if defined(LIBXML_HTML_ENABLED)
ZEND_ME(DOMDocument, loadHTML, arginfo_class_DOMDocument_loadHTML, ZEND_ACC_PUBLIC)
@@ -925,24 +911,8 @@ static const zend_function_entry class_DOMDocument_methods[] = {
ZEND_ME(DOMDocument, saveHTMLFile, arginfo_class_DOMDocument_saveHTMLFile, ZEND_ACC_PUBLIC)
#endif
ZEND_ME(DOMDocument, saveXML, arginfo_class_DOMDocument_saveXML, ZEND_ACC_PUBLIC)
-#if defined(LIBXML_SCHEMAS_ENABLED)
- ZEND_ME(DOMDocument, schemaValidate, arginfo_class_DOMDocument_schemaValidate, ZEND_ACC_PUBLIC)
-#endif
-#if defined(LIBXML_SCHEMAS_ENABLED)
- ZEND_ME(DOMDocument, schemaValidateSource, arginfo_class_DOMDocument_schemaValidateSource, ZEND_ACC_PUBLIC)
-#endif
-#if defined(LIBXML_SCHEMAS_ENABLED)
- ZEND_ME(DOMDocument, relaxNGValidate, arginfo_class_DOMDocument_relaxNGValidate, ZEND_ACC_PUBLIC)
-#endif
-#if defined(LIBXML_SCHEMAS_ENABLED)
- ZEND_ME(DOMDocument, relaxNGValidateSource, arginfo_class_DOMDocument_relaxNGValidateSource, ZEND_ACC_PUBLIC)
-#endif
ZEND_ME(DOMDocument, validate, arginfo_class_DOMDocument_validate, ZEND_ACC_PUBLIC)
ZEND_ME(DOMDocument, xinclude, arginfo_class_DOMDocument_xinclude, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, adoptNode, arginfo_class_DOMDocument_adoptNode, ZEND_ACC_PUBLIC)
- ZEND_MALIAS(DOMElement, append, append, arginfo_class_DOMDocument_append, ZEND_ACC_PUBLIC)
- ZEND_MALIAS(DOMElement, prepend, prepend, arginfo_class_DOMDocument_prepend, ZEND_ACC_PUBLIC)
- ZEND_ME(DOMDocument, replaceChildren, arginfo_class_DOMDocument_replaceChildren, ZEND_ACC_PUBLIC)
ZEND_FE_END
};
@@ -1005,6 +975,42 @@ static const zend_function_entry class_DOMXPath_methods[] = {
#endif
+static const zend_function_entry class_DOM_Document_methods[] = {
+ ZEND_ME(DOM_Document, createAttribute, arginfo_class_DOM_Document_createAttribute, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, createAttributeNS, arginfo_class_DOM_Document_createAttributeNS, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, createCDATASection, arginfo_class_DOM_Document_createCDATASection, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, createComment, arginfo_class_DOM_Document_createComment, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, createDocumentFragment, arginfo_class_DOM_Document_createDocumentFragment, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, createElement, arginfo_class_DOM_Document_createElement, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, createElementNS, arginfo_class_DOM_Document_createElementNS, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, createProcessingInstruction, arginfo_class_DOM_Document_createProcessingInstruction, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, createTextNode, arginfo_class_DOM_Document_createTextNode, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, getElementById, arginfo_class_DOM_Document_getElementById, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, getElementsByTagName, arginfo_class_DOM_Document_getElementsByTagName, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, getElementsByTagNameNS, arginfo_class_DOM_Document_getElementsByTagNameNS, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, importNode, arginfo_class_DOM_Document_importNode, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, normalizeDocument, arginfo_class_DOM_Document_normalizeDocument, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, registerNodeClass, arginfo_class_DOM_Document_registerNodeClass, ZEND_ACC_PUBLIC)
+#if defined(LIBXML_SCHEMAS_ENABLED)
+ ZEND_ME(DOM_Document, schemaValidate, arginfo_class_DOM_Document_schemaValidate, ZEND_ACC_PUBLIC)
+#endif
+#if defined(LIBXML_SCHEMAS_ENABLED)
+ ZEND_ME(DOM_Document, schemaValidateSource, arginfo_class_DOM_Document_schemaValidateSource, ZEND_ACC_PUBLIC)
+#endif
+#if defined(LIBXML_SCHEMAS_ENABLED)
+ ZEND_ME(DOM_Document, relaxNGValidate, arginfo_class_DOM_Document_relaxNGValidate, ZEND_ACC_PUBLIC)
+#endif
+#if defined(LIBXML_SCHEMAS_ENABLED)
+ ZEND_ME(DOM_Document, relaxNGValidateSource, arginfo_class_DOM_Document_relaxNGValidateSource, ZEND_ACC_PUBLIC)
+#endif
+ ZEND_ME(DOM_Document, adoptNode, arginfo_class_DOM_Document_adoptNode, ZEND_ACC_PUBLIC)
+ ZEND_MALIAS(DOMElement, append, append, arginfo_class_DOM_Document_append, ZEND_ACC_PUBLIC)
+ ZEND_MALIAS(DOMElement, prepend, prepend, arginfo_class_DOM_Document_prepend, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_Document, replaceChildren, arginfo_class_DOM_Document_replaceChildren, ZEND_ACC_PUBLIC)
+ ZEND_FE_END
+};
+
+
static const zend_function_entry class_DOM_HTML5Document_methods[] = {
ZEND_ME(DOM_HTML5Document, __construct, arginfo_class_DOM_HTML5Document___construct, ZEND_ACC_PUBLIC)
ZEND_ME(DOM_HTML5Document, load, arginfo_class_DOM_HTML5Document_load, ZEND_ACC_PUBLIC)
@@ -1185,11 +1191,7 @@ static zend_class_entry *register_class_DOMNode(void)
INIT_CLASS_ENTRY(ce, "DOMNode", class_DOMNode_methods);
class_entry = zend_register_internal_class_ex(&ce, NULL);
-<<<<<<< HEAD
-=======
- class_entry->ce_flags |= ZEND_ACC_NOT_SERIALIZABLE;
zend_register_class_alias("DOM\\Node", class_entry);
->>>>>>> 6cdbadbf86 (Create class aliases)
zval const_DOCUMENT_POSITION_DISCONNECTED_value;
ZVAL_LONG(&const_DOCUMENT_POSITION_DISCONNECTED_value, 0x1);
@@ -1353,11 +1355,7 @@ static zend_class_entry *register_class_DOMNameSpaceNode(void)
INIT_CLASS_ENTRY(ce, "DOMNameSpaceNode", class_DOMNameSpaceNode_methods);
class_entry = zend_register_internal_class_ex(&ce, NULL);
-<<<<<<< HEAD
-=======
- class_entry->ce_flags |= ZEND_ACC_NOT_SERIALIZABLE;
zend_register_class_alias("DOM\\NameSpaceNode", class_entry);
->>>>>>> 6cdbadbf86 (Create class aliases)
zval property_nodeName_default_value;
ZVAL_UNDEF(&property_nodeName_default_value);
@@ -1636,34 +1634,12 @@ static zend_class_entry *register_class_DOMElement(zend_class_entry *class_entry
return class_entry;
}
-static zend_class_entry *register_class_DOMDocument(zend_class_entry *class_entry_DOMNode, zend_class_entry *class_entry_DOMParentNode)
+static zend_class_entry *register_class_DOMDocument(zend_class_entry *class_entry_DOM_Document)
{
zend_class_entry ce, *class_entry;
INIT_CLASS_ENTRY(ce, "DOMDocument", class_DOMDocument_methods);
- class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMNode);
- zend_class_implements(class_entry, 1, class_entry_DOMParentNode);
-
- zval property_doctype_default_value;
- ZVAL_UNDEF(&property_doctype_default_value);
- zend_string *property_doctype_name = zend_string_init("doctype", sizeof("doctype") - 1, 1);
- zend_string *property_doctype_class_DOMDocumentType = zend_string_init("DOMDocumentType", sizeof("DOMDocumentType")-1, 1);
- zend_declare_typed_property(class_entry, property_doctype_name, &property_doctype_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_doctype_class_DOMDocumentType, 0, MAY_BE_NULL));
- zend_string_release(property_doctype_name);
-
- zval property_implementation_default_value;
- ZVAL_UNDEF(&property_implementation_default_value);
- zend_string *property_implementation_name = zend_string_init("implementation", sizeof("implementation") - 1, 1);
- zend_string *property_implementation_class_DOMImplementation = zend_string_init("DOMImplementation", sizeof("DOMImplementation")-1, 1);
- zend_declare_typed_property(class_entry, property_implementation_name, &property_implementation_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_implementation_class_DOMImplementation, 0, 0));
- zend_string_release(property_implementation_name);
-
- zval property_documentElement_default_value;
- ZVAL_UNDEF(&property_documentElement_default_value);
- zend_string *property_documentElement_name = zend_string_init("documentElement", sizeof("documentElement") - 1, 1);
- zend_string *property_documentElement_class_DOMElement = zend_string_init("DOMElement", sizeof("DOMElement")-1, 1);
- zend_declare_typed_property(class_entry, property_documentElement_name, &property_documentElement_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_documentElement_class_DOMElement, 0, MAY_BE_NULL));
- zend_string_release(property_documentElement_name);
+ class_entry = zend_register_internal_class_ex(&ce, class_entry_DOM_Document);
zval property_actualEncoding_default_value;
ZVAL_UNDEF(&property_actualEncoding_default_value);
@@ -1671,12 +1647,6 @@ static zend_class_entry *register_class_DOMDocument(zend_class_entry *class_entr
zend_declare_typed_property(class_entry, property_actualEncoding_name, &property_actualEncoding_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING|MAY_BE_NULL));
zend_string_release(property_actualEncoding_name);
- zval property_encoding_default_value;
- ZVAL_UNDEF(&property_encoding_default_value);
- zend_string *property_encoding_name = zend_string_init("encoding", sizeof("encoding") - 1, 1);
- zend_declare_typed_property(class_entry, property_encoding_name, &property_encoding_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING|MAY_BE_NULL));
- zend_string_release(property_encoding_name);
-
zval property_xmlEncoding_default_value;
ZVAL_UNDEF(&property_xmlEncoding_default_value);
zend_string *property_xmlEncoding_name = zend_string_init("xmlEncoding", sizeof("xmlEncoding") - 1, 1);
@@ -1707,18 +1677,6 @@ static zend_class_entry *register_class_DOMDocument(zend_class_entry *class_entr
zend_declare_typed_property(class_entry, property_xmlVersion_name, &property_xmlVersion_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING|MAY_BE_NULL));
zend_string_release(property_xmlVersion_name);
- zval property_strictErrorChecking_default_value;
- ZVAL_UNDEF(&property_strictErrorChecking_default_value);
- zend_string *property_strictErrorChecking_name = zend_string_init("strictErrorChecking", sizeof("strictErrorChecking") - 1, 1);
- zend_declare_typed_property(class_entry, property_strictErrorChecking_name, &property_strictErrorChecking_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_BOOL));
- zend_string_release(property_strictErrorChecking_name);
-
- zval property_documentURI_default_value;
- ZVAL_UNDEF(&property_documentURI_default_value);
- zend_string *property_documentURI_name = zend_string_init("documentURI", sizeof("documentURI") - 1, 1);
- zend_declare_typed_property(class_entry, property_documentURI_name, &property_documentURI_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING|MAY_BE_NULL));
- zend_string_release(property_documentURI_name);
-
zval property_config_default_value;
ZVAL_UNDEF(&property_config_default_value);
zend_string *property_config_name = zend_string_init("config", sizeof("config") - 1, 1);
@@ -1761,26 +1719,6 @@ static zend_class_entry *register_class_DOMDocument(zend_class_entry *class_entr
zend_declare_typed_property(class_entry, property_substituteEntities_name, &property_substituteEntities_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_BOOL));
zend_string_release(property_substituteEntities_name);
- zval property_firstElementChild_default_value;
- ZVAL_UNDEF(&property_firstElementChild_default_value);
- zend_string *property_firstElementChild_name = zend_string_init("firstElementChild", sizeof("firstElementChild") - 1, 1);
- zend_string *property_firstElementChild_class_DOMElement = zend_string_init("DOMElement", sizeof("DOMElement")-1, 1);
- zend_declare_typed_property(class_entry, property_firstElementChild_name, &property_firstElementChild_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_firstElementChild_class_DOMElement, 0, MAY_BE_NULL));
- zend_string_release(property_firstElementChild_name);
-
- zval property_lastElementChild_default_value;
- ZVAL_UNDEF(&property_lastElementChild_default_value);
- zend_string *property_lastElementChild_name = zend_string_init("lastElementChild", sizeof("lastElementChild") - 1, 1);
- zend_string *property_lastElementChild_class_DOMElement = zend_string_init("DOMElement", sizeof("DOMElement")-1, 1);
- zend_declare_typed_property(class_entry, property_lastElementChild_name, &property_lastElementChild_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_lastElementChild_class_DOMElement, 0, MAY_BE_NULL));
- zend_string_release(property_lastElementChild_name);
-
- zval property_childElementCount_default_value;
- ZVAL_UNDEF(&property_childElementCount_default_value);
- zend_string *property_childElementCount_name = zend_string_init("childElementCount", sizeof("childElementCount") - 1, 1);
- zend_declare_typed_property(class_entry, property_childElementCount_name, &property_childElementCount_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_LONG));
- zend_string_release(property_childElementCount_name);
-
return class_entry;
}
@@ -1968,6 +1906,77 @@ static zend_class_entry *register_class_DOMXPath(void)
}
#endif
+static zend_class_entry *register_class_DOM_Document(zend_class_entry *class_entry_DOM_DOMNode, zend_class_entry *class_entry_DOM_DOMParentNode)
+{
+ zend_class_entry ce, *class_entry;
+
+ INIT_NS_CLASS_ENTRY(ce, "DOM", "Document", class_DOM_Document_methods);
+ class_entry = zend_register_internal_class_ex(&ce, class_entry_DOM_DOMNode);
+ class_entry->ce_flags |= ZEND_ACC_ABSTRACT;
+ zend_class_implements(class_entry, 1, class_entry_DOM_DOMParentNode);
+
+ zval property_doctype_default_value;
+ ZVAL_UNDEF(&property_doctype_default_value);
+ zend_string *property_doctype_name = zend_string_init("doctype", sizeof("doctype") - 1, 1);
+ zend_string *property_doctype_class_DOM_DocumentType = zend_string_init("DOM\\DocumentType", sizeof("DOM\\DocumentType")-1, 1);
+ zend_declare_typed_property(class_entry, property_doctype_name, &property_doctype_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_doctype_class_DOM_DocumentType, 0, MAY_BE_NULL));
+ zend_string_release(property_doctype_name);
+
+ zval property_implementation_default_value;
+ ZVAL_UNDEF(&property_implementation_default_value);
+ zend_string *property_implementation_name = zend_string_init("implementation", sizeof("implementation") - 1, 1);
+ zend_string *property_implementation_class_DOM_Implementation = zend_string_init("DOM\\Implementation", sizeof("DOM\\Implementation")-1, 1);
+ zend_declare_typed_property(class_entry, property_implementation_name, &property_implementation_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_implementation_class_DOM_Implementation, 0, 0));
+ zend_string_release(property_implementation_name);
+
+ zval property_documentElement_default_value;
+ ZVAL_UNDEF(&property_documentElement_default_value);
+ zend_string *property_documentElement_name = zend_string_init("documentElement", sizeof("documentElement") - 1, 1);
+ zend_string *property_documentElement_class_DOM_Element = zend_string_init("DOM\\Element", sizeof("DOM\\Element")-1, 1);
+ zend_declare_typed_property(class_entry, property_documentElement_name, &property_documentElement_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_documentElement_class_DOM_Element, 0, MAY_BE_NULL));
+ zend_string_release(property_documentElement_name);
+
+ zval property_encoding_default_value;
+ ZVAL_UNDEF(&property_encoding_default_value);
+ zend_string *property_encoding_name = zend_string_init("encoding", sizeof("encoding") - 1, 1);
+ zend_declare_typed_property(class_entry, property_encoding_name, &property_encoding_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING|MAY_BE_NULL));
+ zend_string_release(property_encoding_name);
+
+ zval property_strictErrorChecking_default_value;
+ ZVAL_UNDEF(&property_strictErrorChecking_default_value);
+ zend_string *property_strictErrorChecking_name = zend_string_init("strictErrorChecking", sizeof("strictErrorChecking") - 1, 1);
+ zend_declare_typed_property(class_entry, property_strictErrorChecking_name, &property_strictErrorChecking_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_BOOL));
+ zend_string_release(property_strictErrorChecking_name);
+
+ zval property_documentURI_default_value;
+ ZVAL_UNDEF(&property_documentURI_default_value);
+ zend_string *property_documentURI_name = zend_string_init("documentURI", sizeof("documentURI") - 1, 1);
+ zend_declare_typed_property(class_entry, property_documentURI_name, &property_documentURI_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING|MAY_BE_NULL));
+ zend_string_release(property_documentURI_name);
+
+ zval property_firstElementChild_default_value;
+ ZVAL_UNDEF(&property_firstElementChild_default_value);
+ zend_string *property_firstElementChild_name = zend_string_init("firstElementChild", sizeof("firstElementChild") - 1, 1);
+ zend_string *property_firstElementChild_class_DOM_Element = zend_string_init("DOM\\Element", sizeof("DOM\\Element")-1, 1);
+ zend_declare_typed_property(class_entry, property_firstElementChild_name, &property_firstElementChild_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_firstElementChild_class_DOM_Element, 0, MAY_BE_NULL));
+ zend_string_release(property_firstElementChild_name);
+
+ zval property_lastElementChild_default_value;
+ ZVAL_UNDEF(&property_lastElementChild_default_value);
+ zend_string *property_lastElementChild_name = zend_string_init("lastElementChild", sizeof("lastElementChild") - 1, 1);
+ zend_string *property_lastElementChild_class_DOM_Element = zend_string_init("DOM\\Element", sizeof("DOM\\Element")-1, 1);
+ zend_declare_typed_property(class_entry, property_lastElementChild_name, &property_lastElementChild_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_lastElementChild_class_DOM_Element, 0, MAY_BE_NULL));
+ zend_string_release(property_lastElementChild_name);
+
+ zval property_childElementCount_default_value;
+ ZVAL_UNDEF(&property_childElementCount_default_value);
+ zend_string *property_childElementCount_name = zend_string_init("childElementCount", sizeof("childElementCount") - 1, 1);
+ zend_declare_typed_property(class_entry, property_childElementCount_name, &property_childElementCount_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_LONG));
+ zend_string_release(property_childElementCount_name);
+
+ return class_entry;
+}
+
static zend_class_entry *register_class_DOM_HTML5Document(zend_class_entry *class_entry_DOMDocument)
{
zend_class_entry ce, *class_entry;
diff --git a/ext/dom/tests/DOMDocument_adoptNode.phpt b/ext/dom/tests/DOMDocument_adoptNode.phpt
index 2382cabd5136f..81fe4c564971a 100644
--- a/ext/dom/tests/DOMDocument_adoptNode.phpt
+++ b/ext/dom/tests/DOMDocument_adoptNode.phpt
@@ -141,7 +141,7 @@ string(27) "
Not Supported Error
-- Adopt a document (strict error off) --
-Warning: DOMDocument::adoptNode(): Not Supported Error in %s on line %d
+Warning: DOM\Document::adoptNode(): Not Supported Error in %s on line %d
-- Adopt an attribute --
bool(true)
bool(true)
diff --git a/ext/dom/tests/DOMDocument_relaxNGValidateSource_error1.phpt b/ext/dom/tests/DOMDocument_relaxNGValidateSource_error1.phpt
index fd476749fe20e..3baa24c50c65f 100644
--- a/ext/dom/tests/DOMDocument_relaxNGValidateSource_error1.phpt
+++ b/ext/dom/tests/DOMDocument_relaxNGValidateSource_error1.phpt
@@ -35,5 +35,5 @@ var_dump($result);
?>
--EXPECTF--
-Warning: DOMDocument::relaxNGValidateSource(): Did not expect element pear there in %s on line %d
+Warning: DOM\Document::relaxNGValidateSource(): Did not expect element pear there in %s on line %d
bool(false)
diff --git a/ext/dom/tests/DOMDocument_relaxNGValidateSource_error2.phpt b/ext/dom/tests/DOMDocument_relaxNGValidateSource_error2.phpt
index da00943c7caf2..76aa3a04939d7 100644
--- a/ext/dom/tests/DOMDocument_relaxNGValidateSource_error2.phpt
+++ b/ext/dom/tests/DOMDocument_relaxNGValidateSource_error2.phpt
@@ -31,7 +31,7 @@ var_dump($result);
?>
--EXPECTF--
-Warning: DOMDocument::relaxNGValidateSource(): xmlRelaxNGParseElement: element has no content in %s on line %d
+Warning: DOM\Document::relaxNGValidateSource(): xmlRelaxNGParseElement: element has no content in %s on line %d
-Warning: DOMDocument::relaxNGValidateSource(): Invalid RelaxNG in %s on line %d
+Warning: DOM\Document::relaxNGValidateSource(): Invalid RelaxNG in %s on line %d
bool(false)
diff --git a/ext/dom/tests/DOMDocument_relaxNGValidate_error1.phpt b/ext/dom/tests/DOMDocument_relaxNGValidate_error1.phpt
index aa38ca5c1ed26..aff2680b3daee 100644
--- a/ext/dom/tests/DOMDocument_relaxNGValidate_error1.phpt
+++ b/ext/dom/tests/DOMDocument_relaxNGValidate_error1.phpt
@@ -20,5 +20,5 @@ $result = $doc->relaxNGValidate($rng);
var_dump($result);
?>
--EXPECTF--
-Warning: DOMDocument::relaxNGValidate(): Did not expect element pear there in %s on line %d
+Warning: DOM\Document::relaxNGValidate(): Did not expect element pear there in %s on line %d
bool(false)
diff --git a/ext/dom/tests/DOMDocument_relaxNGValidate_error2.phpt b/ext/dom/tests/DOMDocument_relaxNGValidate_error2.phpt
index 1ad46e014a0a7..87d4c3317bd29 100644
--- a/ext/dom/tests/DOMDocument_relaxNGValidate_error2.phpt
+++ b/ext/dom/tests/DOMDocument_relaxNGValidate_error2.phpt
@@ -20,9 +20,9 @@ $result = $doc->relaxNGValidate($rng);
var_dump($result);
?>
--EXPECTF--
-Warning: DOMDocument::relaxNGValidate(): I/O warning : failed to load external entity "%s/foo.rng" in %s on line %d
+Warning: DOM\Document::relaxNGValidate(): I/O warning : failed to load external entity "/home/niels/php-src/ext/dom/tests/foo.rng" in %s on line %d
-Warning: DOMDocument::relaxNGValidate(): xmlRelaxNGParse: could not load %s/foo.rng in %s on line %d
+Warning: DOM\Document::relaxNGValidate(): xmlRelaxNGParse: could not load /home/niels/php-src/ext/dom/tests/foo.rng in %s on line %d
-Warning: DOMDocument::relaxNGValidate(): Invalid RelaxNG in %s on line %d
+Warning: DOM\Document::relaxNGValidate(): Invalid RelaxNG in %s on line %d
bool(false)
diff --git a/ext/dom/tests/DOMDocument_schemaValidateSource_error1.phpt b/ext/dom/tests/DOMDocument_schemaValidateSource_error1.phpt
index 10b5d07664d54..f71d192a616ed 100644
--- a/ext/dom/tests/DOMDocument_schemaValidateSource_error1.phpt
+++ b/ext/dom/tests/DOMDocument_schemaValidateSource_error1.phpt
@@ -17,13 +17,13 @@ var_dump($result);
?>
--EXPECTF--
-Warning: DOMDocument::schemaValidateSource(): Entity: line 1: parser error : Start tag expected, '<' not found in %s.php on line %d
+Warning: DOM\Document::schemaValidateSource(): Entity: line 1: parser error : Start tag expected, '<' not found in %s on line %d
-Warning: DOMDocument::schemaValidateSource(): string that is not a schema in %s.php on line %d
+Warning: DOM\Document::schemaValidateSource(): string that is not a schema in %s on line %d
-Warning: DOMDocument::schemaValidateSource(): ^ in %s.php on line %d
+Warning: DOM\Document::schemaValidateSource(): ^ in %s on line %d
-Warning: DOMDocument::schemaValidateSource(): Failed to parse the XML resource 'in_memory_buffer'. in %s.php on line %d
+Warning: DOM\Document::schemaValidateSource(): Failed to parse the XML resource 'in_memory_buffer'. in %s on line %d
-Warning: DOMDocument::schemaValidateSource(): Invalid Schema in %s.php on line %d
+Warning: DOM\Document::schemaValidateSource(): Invalid Schema in %s on line %d
bool(false)
diff --git a/ext/dom/tests/DOMDocument_schemaValidateSource_error2.phpt b/ext/dom/tests/DOMDocument_schemaValidateSource_error2.phpt
index d099afa779c60..b9fea75807a47 100644
--- a/ext/dom/tests/DOMDocument_schemaValidateSource_error2.phpt
+++ b/ext/dom/tests/DOMDocument_schemaValidateSource_error2.phpt
@@ -19,5 +19,5 @@ var_dump($result);
?>
--EXPECTF--
-Warning: DOMDocument::schemaValidateSource(): Element 'books': No matching global declaration available for the validation root. in %s.php on line %d
+Warning: DOM\Document::schemaValidateSource(): Element 'books': No matching global declaration available for the validation root. in %s on line %d
bool(false)
diff --git a/ext/dom/tests/DOMDocument_schemaValidateSource_error3.phpt b/ext/dom/tests/DOMDocument_schemaValidateSource_error3.phpt
index ec295a55e3391..bdeb74ff6c2c9 100644
--- a/ext/dom/tests/DOMDocument_schemaValidateSource_error3.phpt
+++ b/ext/dom/tests/DOMDocument_schemaValidateSource_error3.phpt
@@ -20,4 +20,4 @@ try {
?>
--EXPECT--
-DOMDocument::schemaValidateSource(): Argument #1 ($source) must not be empty
+DOM\Document::schemaValidateSource(): Argument #1 ($source) must not be empty
diff --git a/ext/dom/tests/DOMDocument_schemaValidate_error1.phpt b/ext/dom/tests/DOMDocument_schemaValidate_error1.phpt
index baa8debbdb881..b860007758caa 100644
--- a/ext/dom/tests/DOMDocument_schemaValidate_error1.phpt
+++ b/ext/dom/tests/DOMDocument_schemaValidate_error1.phpt
@@ -17,13 +17,13 @@ var_dump($result);
?>
--EXPECTF--
-Warning: DOMDocument::schemaValidate(): %sbook-not-a-schema.xsd:1: parser error : Start tag expected, '<' not found in %s.php on line %d
+Warning: DOM\Document::schemaValidate(): /home/niels/php-src/ext/dom/tests/book-not-a-schema.xsd:1: parser error : Start tag expected, '<' not found in %s on line %d
-Warning: DOMDocument::schemaValidate(): Let's see what happens upon parsing a file that doesn't contain a schema. in %s.php on line %d
+Warning: DOM\Document::schemaValidate(): Let's see what happens upon parsing a file that doesn't contain a schema. in %s on line %d
-Warning: DOMDocument::schemaValidate(): ^ in %s.php on line %d
+Warning: DOM\Document::schemaValidate(): ^ in %s on line %d
-Warning: DOMDocument::schemaValidate(): Failed to parse the XML resource '%sbook-not-a-schema.xsd'. in %s.php on line %d
+Warning: DOM\Document::schemaValidate(): Failed to parse the XML resource '/home/niels/php-src/ext/dom/tests/book-not-a-schema.xsd'. in %s on line %d
-Warning: DOMDocument::schemaValidate(): Invalid Schema in %s.php on line %d
+Warning: DOM\Document::schemaValidate(): Invalid Schema in %s on line %d
bool(false)
diff --git a/ext/dom/tests/DOMDocument_schemaValidate_error2.phpt b/ext/dom/tests/DOMDocument_schemaValidate_error2.phpt
index 3c4f6f4ff5ed5..ddc491c70ca3f 100644
--- a/ext/dom/tests/DOMDocument_schemaValidate_error2.phpt
+++ b/ext/dom/tests/DOMDocument_schemaValidate_error2.phpt
@@ -17,5 +17,5 @@ var_dump($result);
?>
--EXPECTF--
-Warning: DOMDocument::schemaValidate(): Element 'books': No matching global declaration available for the validation root. in %s.php on line %d
+Warning: DOM\Document::schemaValidate(): Element 'books': No matching global declaration available for the validation root. in %s on line %d
bool(false)
diff --git a/ext/dom/tests/DOMDocument_schemaValidate_error3.phpt b/ext/dom/tests/DOMDocument_schemaValidate_error3.phpt
index 274463e62e139..d48ed3d1963b2 100644
--- a/ext/dom/tests/DOMDocument_schemaValidate_error3.phpt
+++ b/ext/dom/tests/DOMDocument_schemaValidate_error3.phpt
@@ -20,4 +20,4 @@ try {
?>
--EXPECT--
-DOMDocument::schemaValidate(): Argument #1 ($filename) must not be empty
+DOM\Document::schemaValidate(): Argument #1 ($filename) must not be empty
diff --git a/ext/dom/tests/DOMDocument_schemaValidate_error5.phpt b/ext/dom/tests/DOMDocument_schemaValidate_error5.phpt
index 2feda5d1e1f8d..888753302847d 100644
--- a/ext/dom/tests/DOMDocument_schemaValidate_error5.phpt
+++ b/ext/dom/tests/DOMDocument_schemaValidate_error5.phpt
@@ -17,9 +17,9 @@ var_dump($result);
?>
--EXPECTF--
-Warning: DOMDocument::schemaValidate(): I/O warning : failed to load external entity "%snon-existent-file" in %s.php on line %d
+Warning: DOM\Document::schemaValidate(): I/O warning : failed to load external entity "/home/niels/php-src/ext/dom/tests/non-existent-file" in %s on line %d
-Warning: DOMDocument::schemaValidate(): Failed to locate the main schema resource at '%s/non-existent-file'. in %s.php on line %d
+Warning: DOM\Document::schemaValidate(): Failed to locate the main schema resource at '/home/niels/php-src/ext/dom/tests/non-existent-file'. in %s on line %d
-Warning: DOMDocument::schemaValidate(): Invalid Schema in %s.php on line %d
+Warning: DOM\Document::schemaValidate(): Invalid Schema in %s on line %d
bool(false)
diff --git a/ext/dom/tests/DOMDocument_schemaValidate_error6.phpt b/ext/dom/tests/DOMDocument_schemaValidate_error6.phpt
index c5d99b20a3fec..3668921264cf2 100644
--- a/ext/dom/tests/DOMDocument_schemaValidate_error6.phpt
+++ b/ext/dom/tests/DOMDocument_schemaValidate_error6.phpt
@@ -19,7 +19,7 @@ var_dump($doc->schemaValidate(str_repeat(" ", PHP_MAXPATHLEN + 1)));
?>
--EXPECTF--
-DOMDocument::schemaValidate(): Argument #1 ($filename) must not contain any null bytes
+DOM\Document::schemaValidate(): Argument #1 ($filename) must not contain any null bytes
-Warning: DOMDocument::schemaValidate(): Invalid Schema file source in %s on line %d
+Warning: DOM\Document::schemaValidate(): Invalid Schema file source in %s on line %d
bool(false)
diff --git a/ext/dom/tests/DOMDocument_strictErrorChecking_variation.phpt b/ext/dom/tests/DOMDocument_strictErrorChecking_variation.phpt
index 60027f5117d93..3def63da9b5a9 100644
--- a/ext/dom/tests/DOMDocument_strictErrorChecking_variation.phpt
+++ b/ext/dom/tests/DOMDocument_strictErrorChecking_variation.phpt
@@ -56,4 +56,4 @@ See if strictErrorChecking is off
bool(false)
Should raise PHP error because strictErrorChecking is off
-Warning: DOMDocument::createAttribute(): Invalid Character Error in %sDOMDocument_strictErrorChecking_variation.php on line %d
+Warning: DOM\Document::createAttribute(): Invalid Character Error in %s on line %d
diff --git a/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_02.phpt b/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_02.phpt
index 61566a97772ec..c4430b49f479a 100644
--- a/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_02.phpt
+++ b/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_02.phpt
@@ -10,8 +10,8 @@ $dom->registerNodeClass("DOM\\HTML5Document", "DOMDocument");
?>
--EXPECTF--
-Fatal error: Uncaught Error: DOMDocument::registerNodeClass(): Argument #2 ($extendedClass) must be a class name derived from DOM\HTML5Document or null, DOMDocument given in %s:%d
+Fatal error: Uncaught Error: DOM\Document::registerNodeClass(): Argument #2 ($extendedClass) must be a class name derived from DOM\HTML5Document or null, DOMDocument given in %s:%d
Stack trace:
-#0 %s(%d): DOMDocument->registerNodeClass('DOM\\HTML5Docume...', 'DOMDocument')
+#0 %s(%d): DOM\Document->registerNodeClass('DOM\\HTML5Docume...', 'DOMDocument')
#1 {main}
thrown in %s on line %d
From feca60b0017b06cd55c2b34c08906029b6724e62 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 17 Sep 2023 15:51:48 +0200
Subject: [PATCH 09/53] Register prop handlers in common base class
---
ext/dom/php_dom.c | 27 +++++++++++--------
.../Document_node_ownerDocument_for_XML.phpt | 20 +++++++-------
...should_retain_properties_and_owner_01.phpt | 20 +++++++-------
...should_retain_properties_and_owner_02.phpt | 20 +++++++-------
ext/dom/tests/domobject_debug_handler.phpt | 20 +++++++-------
5 files changed, 56 insertions(+), 51 deletions(-)
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index 8b302dc0f629d..06063ba9d33f7 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -698,14 +698,23 @@ PHP_MINIT_FUNCTION(dom)
zend_hash_add_ptr(&classes, dom_documentfragment_class_entry->name, &dom_documentfragment_prop_handlers);
dom_abstract_base_document_class_entry = register_class_DOM_Document(dom_node_class_entry, dom_parentnode_class_entry);
- // TODO: prop handlers
+ /* No need to set create_object as it's abstract. */
+ HashTable dom_abstract_base_document_prop_handlers;
+ zend_hash_init(&dom_abstract_base_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1);
+ dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "doctype", sizeof("doctype")-1, dom_document_doctype_read, NULL);
+ dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "implementation", sizeof("implementation")-1, dom_document_implementation_read, NULL);
+ dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "documentElement", sizeof("documentElement")-1, dom_document_document_element_read, NULL);
+ dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "strictErrorChecking", sizeof("strictErrorChecking")-1, dom_document_strict_error_checking_read, dom_document_strict_error_checking_write);
+ dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "documentURI", sizeof("documentURI")-1, dom_document_document_uri_read, dom_document_document_uri_write);
+ dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "firstElementChild", sizeof("firstElementChild")-1, dom_parent_node_first_element_child_read, NULL);
+ dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "lastElementChild", sizeof("lastElementChild")-1, dom_parent_node_last_element_child_read, NULL);
+ dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "childElementCount", sizeof("childElementCount")-1, dom_parent_node_child_element_count, NULL);
+ zend_hash_merge(&dom_abstract_base_document_prop_handlers, &dom_node_prop_handlers, dom_copy_prop_handler, 0);
+ /* No need to register in &classes, because this is only used for merging. This is destroyed down below. */
dom_document_class_entry = register_class_DOMDocument(dom_abstract_base_document_class_entry);
dom_document_class_entry->create_object = dom_objects_new;
zend_hash_init(&dom_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1);
- dom_register_prop_handler(&dom_document_prop_handlers, "doctype", sizeof("doctype")-1, dom_document_doctype_read, NULL);
- dom_register_prop_handler(&dom_document_prop_handlers, "implementation", sizeof("implementation")-1, dom_document_implementation_read, NULL);
- dom_register_prop_handler(&dom_document_prop_handlers, "documentElement", sizeof("documentElement")-1, dom_document_document_element_read, NULL);
dom_register_prop_handler(&dom_document_prop_handlers, "actualEncoding", sizeof("actualEncoding")-1, dom_document_encoding_read, NULL);
dom_register_prop_handler(&dom_document_prop_handlers, "encoding", sizeof("encoding")-1, dom_document_encoding_read, dom_document_encoding_write);
dom_register_prop_handler(&dom_document_prop_handlers, "xmlEncoding", sizeof("xmlEncoding")-1, dom_document_encoding_read, NULL);
@@ -713,8 +722,6 @@ PHP_MINIT_FUNCTION(dom)
dom_register_prop_handler(&dom_document_prop_handlers, "xmlStandalone", sizeof("xmlStandalone")-1, dom_document_standalone_read, dom_document_standalone_write);
dom_register_prop_handler(&dom_document_prop_handlers, "version", sizeof("version")-1, dom_document_version_read, dom_document_version_write);
dom_register_prop_handler(&dom_document_prop_handlers, "xmlVersion", sizeof("xmlVersion")-1, dom_document_version_read, dom_document_version_write);
- dom_register_prop_handler(&dom_document_prop_handlers, "strictErrorChecking", sizeof("strictErrorChecking")-1, dom_document_strict_error_checking_read, dom_document_strict_error_checking_write);
- dom_register_prop_handler(&dom_document_prop_handlers, "documentURI", sizeof("documentURI")-1, dom_document_document_uri_read, dom_document_document_uri_write);
dom_register_prop_handler(&dom_document_prop_handlers, "config", sizeof("config")-1, dom_document_config_read, NULL);
dom_register_prop_handler(&dom_document_prop_handlers, "formatOutput", sizeof("formatOutput")-1, dom_document_format_output_read, dom_document_format_output_write);
dom_register_prop_handler(&dom_document_prop_handlers, "validateOnParse", sizeof("validateOnParse")-1, dom_document_validate_on_parse_read, dom_document_validate_on_parse_write);
@@ -723,11 +730,7 @@ PHP_MINIT_FUNCTION(dom)
dom_register_prop_handler(&dom_document_prop_handlers, "recover", sizeof("recover")-1, dom_document_recover_read, dom_document_recover_write);
dom_register_prop_handler(&dom_document_prop_handlers, "substituteEntities", sizeof("substituteEntities")-1, dom_document_substitue_entities_read, dom_document_substitue_entities_write);
- dom_register_prop_handler(&dom_document_prop_handlers, "firstElementChild", sizeof("firstElementChild")-1, dom_parent_node_first_element_child_read, NULL);
- dom_register_prop_handler(&dom_document_prop_handlers, "lastElementChild", sizeof("lastElementChild")-1, dom_parent_node_last_element_child_read, NULL);
- dom_register_prop_handler(&dom_document_prop_handlers, "childElementCount", sizeof("childElementCount")-1, dom_parent_node_child_element_count, NULL);
-
- zend_hash_merge(&dom_document_prop_handlers, &dom_node_prop_handlers, dom_copy_prop_handler, 0);
+ zend_hash_merge(&dom_document_prop_handlers, &dom_abstract_base_document_prop_handlers, dom_copy_prop_handler, 0);
zend_hash_add_ptr(&classes, dom_document_class_entry->name, &dom_document_prop_handlers);
dom_html5_document_class_entry = register_class_DOM_HTML5Document(dom_document_class_entry);
@@ -737,6 +740,8 @@ PHP_MINIT_FUNCTION(dom)
zend_hash_merge(&dom_html5_document_prop_handlers, &dom_document_prop_handlers, dom_copy_prop_handler, 0);
zend_hash_add_ptr(&classes, dom_html5_document_class_entry->name, &dom_html5_document_prop_handlers);
+ zend_hash_destroy(&dom_abstract_base_document_prop_handlers);
+
dom_nodelist_class_entry = register_class_DOMNodeList(zend_ce_aggregate, zend_ce_countable);
dom_nodelist_class_entry->create_object = dom_nnodemap_objects_new;
dom_nodelist_class_entry->default_object_handlers = &dom_nodelist_object_handlers;
diff --git a/ext/dom/tests/HTML5/interactions/Document_node_ownerDocument_for_XML.phpt b/ext/dom/tests/HTML5/interactions/Document_node_ownerDocument_for_XML.phpt
index a5b027113063a..9f083c0ca42cc 100644
--- a/ext/dom/tests/HTML5/interactions/Document_node_ownerDocument_for_XML.phpt
+++ b/ext/dom/tests/HTML5/interactions/Document_node_ownerDocument_for_XML.phpt
@@ -17,12 +17,6 @@ var_dump($element->ownerDocument);
object(DOM\HTML5Document)#1 (40) {
["encoding"]=>
NULL
- ["doctype"]=>
- NULL
- ["implementation"]=>
- string(22) "(object value omitted)"
- ["documentElement"]=>
- string(22) "(object value omitted)"
["actualEncoding"]=>
NULL
["xmlEncoding"]=>
@@ -35,10 +29,6 @@ object(DOM\HTML5Document)#1 (40) {
string(3) "1.0"
["xmlVersion"]=>
string(3) "1.0"
- ["strictErrorChecking"]=>
- bool(true)
- ["documentURI"]=>
- string(%d) %s
["config"]=>
NULL
["formatOutput"]=>
@@ -53,6 +43,16 @@ object(DOM\HTML5Document)#1 (40) {
bool(false)
["substituteEntities"]=>
bool(false)
+ ["doctype"]=>
+ NULL
+ ["implementation"]=>
+ string(22) "(object value omitted)"
+ ["documentElement"]=>
+ string(22) "(object value omitted)"
+ ["strictErrorChecking"]=>
+ bool(true)
+ ["documentURI"]=>
+ string(%d) "%s"
["firstElementChild"]=>
string(22) "(object value omitted)"
["lastElementChild"]=>
diff --git a/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_01.phpt b/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_01.phpt
index 6f6686384ead3..0d8a36ff3778c 100644
--- a/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_01.phpt
+++ b/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_01.phpt
@@ -25,12 +25,6 @@ var_dump($dom->strictErrorChecking);
object(DOM\HTML5Document)#1 (40) {
["encoding"]=>
string(5) "UTF-8"
- ["doctype"]=>
- NULL
- ["implementation"]=>
- string(22) "(object value omitted)"
- ["documentElement"]=>
- string(22) "(object value omitted)"
["actualEncoding"]=>
string(5) "UTF-8"
["xmlEncoding"]=>
@@ -43,10 +37,6 @@ object(DOM\HTML5Document)#1 (40) {
NULL
["xmlVersion"]=>
NULL
- ["strictErrorChecking"]=>
- bool(false)
- ["documentURI"]=>
- NULL
["config"]=>
NULL
["formatOutput"]=>
@@ -61,6 +51,16 @@ object(DOM\HTML5Document)#1 (40) {
bool(false)
["substituteEntities"]=>
bool(false)
+ ["doctype"]=>
+ NULL
+ ["implementation"]=>
+ string(22) "(object value omitted)"
+ ["documentElement"]=>
+ string(22) "(object value omitted)"
+ ["strictErrorChecking"]=>
+ bool(false)
+ ["documentURI"]=>
+ NULL
["firstElementChild"]=>
string(22) "(object value omitted)"
["lastElementChild"]=>
diff --git a/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_02.phpt b/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_02.phpt
index 676c127e167d3..268f52b81c483 100644
--- a/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_02.phpt
+++ b/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_02.phpt
@@ -24,12 +24,6 @@ var_dump($dom->strictErrorChecking);
object(DOM\HTML5Document)#1 (40) {
["encoding"]=>
NULL
- ["doctype"]=>
- NULL
- ["implementation"]=>
- string(22) "(object value omitted)"
- ["documentElement"]=>
- string(22) "(object value omitted)"
["actualEncoding"]=>
NULL
["xmlEncoding"]=>
@@ -42,10 +36,6 @@ object(DOM\HTML5Document)#1 (40) {
string(3) "1.0"
["xmlVersion"]=>
string(3) "1.0"
- ["strictErrorChecking"]=>
- bool(false)
- ["documentURI"]=>
- NULL
["config"]=>
NULL
["formatOutput"]=>
@@ -60,6 +50,16 @@ object(DOM\HTML5Document)#1 (40) {
bool(false)
["substituteEntities"]=>
bool(false)
+ ["doctype"]=>
+ NULL
+ ["implementation"]=>
+ string(22) "(object value omitted)"
+ ["documentElement"]=>
+ string(22) "(object value omitted)"
+ ["strictErrorChecking"]=>
+ bool(false)
+ ["documentURI"]=>
+ NULL
["firstElementChild"]=>
string(22) "(object value omitted)"
["lastElementChild"]=>
diff --git a/ext/dom/tests/domobject_debug_handler.phpt b/ext/dom/tests/domobject_debug_handler.phpt
index 98a9a72315ac1..a6e04a58d0bc5 100644
--- a/ext/dom/tests/domobject_debug_handler.phpt
+++ b/ext/dom/tests/domobject_debug_handler.phpt
@@ -20,12 +20,6 @@ object(DOMDocument)#1 (41) {
["dynamicProperty"]=>
object(stdClass)#2 (0) {
}
- ["doctype"]=>
- NULL
- ["implementation"]=>
- string(22) "(object value omitted)"
- ["documentElement"]=>
- string(22) "(object value omitted)"
["actualEncoding"]=>
NULL
["encoding"]=>
@@ -40,10 +34,6 @@ object(DOMDocument)#1 (41) {
string(3) "1.0"
["xmlVersion"]=>
string(3) "1.0"
- ["strictErrorChecking"]=>
- bool(true)
- ["documentURI"]=>
- string(%d) %s
["config"]=>
NULL
["formatOutput"]=>
@@ -58,6 +48,16 @@ object(DOMDocument)#1 (41) {
bool(false)
["substituteEntities"]=>
bool(false)
+ ["doctype"]=>
+ NULL
+ ["implementation"]=>
+ string(22) "(object value omitted)"
+ ["documentElement"]=>
+ string(22) "(object value omitted)"
+ ["strictErrorChecking"]=>
+ bool(true)
+ ["documentURI"]=>
+ string(%d) "%s"
["firstElementChild"]=>
string(22) "(object value omitted)"
["lastElementChild"]=>
From f150f32b9476751cac1ea1ddb9ad00a128e12b29 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 17 Sep 2023 19:40:44 +0200
Subject: [PATCH 10/53] Implement the new class hierarchy instead of
HTML5Document extends DOMDocument
---
ext/dom/config.m4 | 2 +-
ext/dom/config.w32 | 2 +-
ext/dom/document.c | 60 ++---
ext/dom/dom_ce.h | 3 +-
ext/dom/dom_properties.h | 2 +-
ext/dom/{html5_document.c => html_document.c} | 172 +++++--------
ext/dom/php_dom.c | 92 ++++---
ext/dom/php_dom.h | 4 +-
ext/dom/php_dom.stub.php | 106 ++++++--
ext/dom/php_dom_arginfo.h | 227 ++++++++++++++----
ext/dom/tests/DOMDocument_load_error6.phpt | 2 +-
.../DOMDocument_relaxNGValidate_error2.phpt | 4 +-
.../DOMDocument_schemaValidate_error1.phpt | 4 +-
.../DOMDocument_schemaValidate_error5.phpt | 4 +-
.../Document_encoding_unicode_error.phpt | 27 ---
.../Document_load_different_encoding.phpt | 19 --
.../HTML5/interactions/Document_clone.phpt | 31 ---
.../Document_registerNodeClass_01.phpt | 28 ---
.../Document_registerNodeClass_03.phpt | 26 --
.../Document_loadHTMLFile_empty_path.phpt | 19 --
...ment_loadHTMLFile_local_existing_file.phpt | 24 --
...oadHTMLFile_local_file_does_not_exist.phpt | 15 --
...oadHTMLFile_nul_terminator_cases_path.phpt | 21 --
.../Document_loadHTML_parser_warning_01.phpt | 24 --
.../Document_loadHTML_parser_warning_02.phpt | 33 ---
.../HTML5/parser/Document_load_options.phpt | 109 ---------
ext/dom/tests/domobject_debug_handler.phpt | 8 +-
.../html/encoding/HTMLDocument_GB18030.phpt} | 5 +-
.../encoding/HTMLDocument_Shift_JIS.phpt} | 5 +-
.../encoding/HTMLDocument_UTF16BE_BOM.phpt} | 5 +-
.../encoding/HTMLDocument_UTF16LE_BOM.phpt} | 5 +-
.../html/encoding/HTMLDocument_UTF8_BOM.phpt} | 5 +-
.../encoding/HTMLDocument_Windows1251.phpt} | 5 +-
.../HTMLDocument_encoding_edge_case_01.phpt} | 4 +-
.../HTMLDocument_encoding_edge_case_02.phpt} | 5 +-
.../HTMLDocument_encoding_edge_case_03.phpt} | 5 +-
.../HTMLDocument_encoding_edge_case_04.phpt} | 4 +-
.../HTMLDocument_encoding_edge_case_05.phpt} | 7 +-
.../HTMLDocument_encoding_edge_case_06.phpt} | 4 +-
.../HTMLDocument_encoding_edge_case_07.phpt} | 7 +-
.../HTMLDocument_encoding_field_test.phpt} | 13 +-
.../HTMLDocument_encoding_unicode_error.phpt | 26 ++
.../HTMLDocument_fallback_encoding.phpt} | 5 +-
.../html}/encoding/fallback_encoding.html | 0
.../html}/encoding/gb18030.html | 0
.../html}/encoding/shift_jis.html | 0
.../html}/encoding/utf16be_bom.html | Bin
.../html}/encoding/utf16le_bom.html | Bin
.../html}/encoding/utf16le_error.html | Bin
.../html}/encoding/utf8_bom.html | 0
.../html}/encoding/windows1251.html | 0
.../HTMLDocument_adopt_DOMDocument.phpt} | 4 +-
.../html/interactions/HTMLDocument_clone.phpt | 23 ++
.../HTMLDocument_registerNodeClass_01.phpt} | 8 +-
.../HTMLDocument_registerNodeClass_02.phpt | 36 +++
...hould_retain_properties_and_owner_01.phpt} | 35 +--
...hould_retain_properties_and_owner_02.phpt} | 44 +---
...ment_fromFile_DOM_HTML_NO_DEFAULT_NS.phpt} | 8 +-
.../HTMLDocument_fromFile_empty_path.phpt | 16 ++
...Document_fromFile_local_existing_file.phpt | 22 ++
...nt_fromFile_local_file_does_not_exist.phpt | 19 ++
...nt_fromFile_nul_terminator_cases_path.phpt | 22 ++
...LDocument_fromFile_parser_warning_01.phpt} | 5 +-
...LDocument_fromFile_parser_warning_02.phpt} | 5 +-
...LDocument_fromFile_parser_warning_03.phpt} | 5 +-
...fromFile_with_failing_stream_wrapper.phpt} | 13 +-
...fromFile_with_working_stream_wrapper.phpt} | 10 +-
...nt_fromString_DOM_HTML_NO_DEFAULT_NS.phpt} | 8 +-
...MLDocument_fromString_LIBXML_COMPACT.phpt} | 5 +-
...ring_LIBXML_HTML_NOIMPLIED_namespace.phpt} | 5 +-
.../HTMLDocument_fromString_empty.phpt} | 6 +-
...tring_fromFile_LIBXML_HTML_NOIMPLIED.phpt} | 13 +-
.../HTMLDocument_fromString_line_column.phpt} | 6 +-
...LDocument_fromString_normal_no_error.phpt} | 6 +-
.../HTMLDocument_fromString_old_dtd.phpt} | 7 +-
...Document_fromString_parser_warning_01.phpt | 23 ++
...Document_fromString_parser_warning_02.phpt | 32 +++
...ocument_fromString_parser_warning_03.phpt} | 5 +-
...String_parser_warning_internal_error.phpt} | 6 +-
...HTMLDocument_fromString_without_body.phpt} | 7 +-
.../parser/HTMLDocument_parse_options.phpt | 107 +++++++++
.../html}/parser/paragraph.html | 0
.../html}/parser/parser_warning_01.html | 0
.../html}/parser/parser_warning_02.html | Bin
.../html}/parser/parser_warning_03.html | 0
.../html}/parser/predefined_namespaces.phpt | 5 +-
.../HTMLDocument_escape_attribute.phpt} | 5 +-
.../serializer/HTMLDocument_escape_nbsp.phpt} | 5 +-
.../HTMLDocument_serialize_attribute_ns.phpt} | 4 +-
.../HTMLDocument_serialize_cdata.phpt} | 4 +-
.../HTMLDocument_serialize_comment.phpt} | 4 +-
.../HTMLDocument_serialize_doctype.phpt} | 5 +-
.../HTMLDocument_serialize_element_ns.phpt} | 4 +-
...TMLDocument_serialize_failing_stream.phpt} | 6 +-
.../HTMLDocument_serialize_fragment.phpt} | 4 +-
...HTMLDocument_serialize_full_document.phpt} | 5 +-
...TMLDocument_serialize_ns_imported_01.phpt} | 10 +-
...TMLDocument_serialize_ns_imported_02.phpt} | 10 +-
...TMLDocument_serialize_ns_imported_03.phpt} | 10 +-
...TMLDocument_serialize_ns_imported_04.phpt} | 10 +-
...TMLDocument_serialize_ns_imported_05.phpt} | 10 +-
...TMLDocument_serialize_ns_imported_06.phpt} | 10 +-
...ent_serialize_processing_instruction.phpt} | 4 +-
...LDocument_serialize_roots_test_empty.phpt} | 4 +-
.../HTMLDocument_serialize_text_01.phpt} | 5 +-
.../HTMLDocument_serialize_text_02.phpt} | 4 +-
.../HTMLDocument_serialize_text_03.phpt} | 4 +-
...HTMLDocument_serialize_void_elements.phpt} | 4 +-
.../tests/modern/xml/XMLDocument_debug.phpt | 88 +++++++
.../xml/XMLDocument_fromEmptyDocument_01.phpt | 16 ++
.../xml/XMLDocument_fromEmptyDocument_02.phpt | 88 +++++++
.../xml/XMLDocument_fromEmptyDocument_03.phpt | 15 ++
.../modern/xml/XMLDocument_fromFile_01.phpt | 16 ++
.../modern/xml/XMLDocument_fromFile_02.phpt | 18 ++
.../modern/xml/XMLDocument_fromFile_03.phpt | 16 ++
.../modern/xml/XMLDocument_fromFile_04.phpt | 23 ++
.../modern/xml/XMLDocument_fromString_01.phpt | 16 ++
.../modern/xml/XMLDocument_fromString_02.phpt | 15 ++
.../modern/xml/XMLDocument_fromString_03.phpt | 24 ++
...LDocument_node_ownerDocument_for_XML.phpt} | 13 +-
.../modern/xml/XMLDocument_saveXML_node.phpt | 16 ++
ext/dom/xml_document.c | 194 +++++++++++++++
ext/dom/xpath.c | 2 +-
ext/libxml/libxml.c | 2 +-
ext/libxml/php_libxml.h | 2 +-
125 files changed, 1469 insertions(+), 920 deletions(-)
rename ext/dom/{html5_document.c => html_document.c} (92%)
delete mode 100644 ext/dom/tests/HTML5/encoding/Document_encoding_unicode_error.phpt
delete mode 100644 ext/dom/tests/HTML5/encoding/Document_load_different_encoding.phpt
delete mode 100644 ext/dom/tests/HTML5/interactions/Document_clone.phpt
delete mode 100644 ext/dom/tests/HTML5/interactions/Document_registerNodeClass_01.phpt
delete mode 100644 ext/dom/tests/HTML5/interactions/Document_registerNodeClass_03.phpt
delete mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_empty_path.phpt
delete mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_existing_file.phpt
delete mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_file_does_not_exist.phpt
delete mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTMLFile_nul_terminator_cases_path.phpt
delete mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_01.phpt
delete mode 100644 ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_02.phpt
delete mode 100644 ext/dom/tests/HTML5/parser/Document_load_options.phpt
rename ext/dom/tests/{HTML5/encoding/Document_GB18030.phpt => modern/html/encoding/HTMLDocument_GB18030.phpt} (86%)
rename ext/dom/tests/{HTML5/encoding/Document_Shift_JIS.phpt => modern/html/encoding/HTMLDocument_Shift_JIS.phpt} (86%)
rename ext/dom/tests/{HTML5/encoding/Document_UTF16BE_BOM.phpt => modern/html/encoding/HTMLDocument_UTF16BE_BOM.phpt} (89%)
rename ext/dom/tests/{HTML5/encoding/Document_UTF16LE_BOM.phpt => modern/html/encoding/HTMLDocument_UTF16LE_BOM.phpt} (89%)
rename ext/dom/tests/{HTML5/encoding/Document_UTF8_BOM.phpt => modern/html/encoding/HTMLDocument_UTF8_BOM.phpt} (86%)
rename ext/dom/tests/{HTML5/encoding/Document_Windows1251.phpt => modern/html/encoding/HTMLDocument_Windows1251.phpt} (87%)
rename ext/dom/tests/{HTML5/encoding/Document_encoding_edge_case_01.phpt => modern/html/encoding/HTMLDocument_encoding_edge_case_01.phpt} (98%)
rename ext/dom/tests/{HTML5/encoding/Document_encoding_edge_case_02.phpt => modern/html/encoding/HTMLDocument_encoding_edge_case_02.phpt} (97%)
rename ext/dom/tests/{HTML5/encoding/Document_encoding_edge_case_03.phpt => modern/html/encoding/HTMLDocument_encoding_edge_case_03.phpt} (97%)
rename ext/dom/tests/{HTML5/encoding/Document_encoding_edge_case_04.phpt => modern/html/encoding/HTMLDocument_encoding_edge_case_04.phpt} (98%)
rename ext/dom/tests/{HTML5/encoding/Document_encoding_edge_case_05.phpt => modern/html/encoding/HTMLDocument_encoding_edge_case_05.phpt} (96%)
rename ext/dom/tests/{HTML5/encoding/Document_encoding_edge_case_06.phpt => modern/html/encoding/HTMLDocument_encoding_edge_case_06.phpt} (98%)
rename ext/dom/tests/{HTML5/encoding/Document_encoding_edge_case_07.phpt => modern/html/encoding/HTMLDocument_encoding_edge_case_07.phpt} (96%)
rename ext/dom/tests/{HTML5/encoding/Document_encoding_field_test.phpt => modern/html/encoding/HTMLDocument_encoding_field_test.phpt} (65%)
create mode 100644 ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_unicode_error.phpt
rename ext/dom/tests/{HTML5/encoding/Document_fallback_encoding.phpt => modern/html/encoding/HTMLDocument_fallback_encoding.phpt} (68%)
rename ext/dom/tests/{HTML5 => modern/html}/encoding/fallback_encoding.html (100%)
rename ext/dom/tests/{HTML5 => modern/html}/encoding/gb18030.html (100%)
rename ext/dom/tests/{HTML5 => modern/html}/encoding/shift_jis.html (100%)
rename ext/dom/tests/{HTML5 => modern/html}/encoding/utf16be_bom.html (100%)
rename ext/dom/tests/{HTML5 => modern/html}/encoding/utf16le_bom.html (100%)
rename ext/dom/tests/{HTML5 => modern/html}/encoding/utf16le_error.html (100%)
rename ext/dom/tests/{HTML5 => modern/html}/encoding/utf8_bom.html (100%)
rename ext/dom/tests/{HTML5 => modern/html}/encoding/windows1251.html (100%)
rename ext/dom/tests/{HTML5/interactions/Document_adopt_DOMDocument.phpt => modern/html/interactions/HTMLDocument_adopt_DOMDocument.phpt} (82%)
create mode 100644 ext/dom/tests/modern/html/interactions/HTMLDocument_clone.phpt
rename ext/dom/tests/{HTML5/interactions/Document_registerNodeClass_02.phpt => modern/html/interactions/HTMLDocument_registerNodeClass_01.phpt} (56%)
create mode 100644 ext/dom/tests/modern/html/interactions/HTMLDocument_registerNodeClass_02.phpt
rename ext/dom/tests/{HTML5/interactions/Document_should_retain_properties_and_owner_01.phpt => modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_01.phpt} (66%)
rename ext/dom/tests/{HTML5/interactions/Document_should_retain_properties_and_owner_02.phpt => modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_02.phpt} (65%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTMLFile_DOM_HTML_NO_DEFAULT_NS copy.phpt => modern/html/parser/HTMLDocument_fromFile_DOM_HTML_NO_DEFAULT_NS.phpt} (64%)
create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_empty_path.phpt
create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_local_existing_file.phpt
create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_local_file_does_not_exist.phpt
create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_nul_terminator_cases_path.phpt
rename ext/dom/tests/{HTML5/parser/Document_loadHTMLFile_parser_warning_01.phpt => modern/html/parser/HTMLDocument_fromFile_parser_warning_01.phpt} (65%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTMLFile_parser_warning_02.phpt => modern/html/parser/HTMLDocument_fromFile_parser_warning_02.phpt} (65%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTMLFile_parser_warning_03.phpt => modern/html/parser/HTMLDocument_fromFile_parser_warning_03.phpt} (51%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTMLFile_with_failing_stream_wrapper.phpt => modern/html/parser/HTMLDocument_fromFile_with_failing_stream_wrapper.phpt} (73%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTMLFile_with_working_stream_wrapper.phpt => modern/html/parser/HTMLDocument_fromFile_with_working_stream_wrapper.phpt} (76%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTML_DOM_HTML_NO_DEFAULT_NS.phpt => modern/html/parser/HTMLDocument_fromString_DOM_HTML_NO_DEFAULT_NS.phpt} (62%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTML_LIBXML_COMPACT.phpt => modern/html/parser/HTMLDocument_fromString_LIBXML_COMPACT.phpt} (81%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED_namespace.phpt => modern/html/parser/HTMLDocument_fromString_LIBXML_HTML_NOIMPLIED_namespace.phpt} (60%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTML_empty.phpt => modern/html/parser/HTMLDocument_fromString_empty.phpt} (55%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED.phpt => modern/html/parser/HTMLDocument_fromString_fromFile_LIBXML_HTML_NOIMPLIED.phpt} (83%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTML_line_column.phpt => modern/html/parser/HTMLDocument_fromString_line_column.phpt} (91%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTML_normal_no_error.phpt => modern/html/parser/HTMLDocument_fromString_normal_no_error.phpt} (86%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTML_old_dtd.phpt => modern/html/parser/HTMLDocument_fromString_old_dtd.phpt} (73%)
create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_01.phpt
create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_02.phpt
rename ext/dom/tests/{HTML5/parser/Document_loadHTML_parser_warning_03.phpt => modern/html/parser/HTMLDocument_fromString_parser_warning_03.phpt} (64%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTML_parser_warning_internal_error.phpt => modern/html/parser/HTMLDocument_fromString_parser_warning_internal_error.phpt} (83%)
rename ext/dom/tests/{HTML5/parser/Document_loadHTML_without_body.phpt => modern/html/parser/HTMLDocument_fromString_without_body.phpt} (50%)
create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_parse_options.phpt
rename ext/dom/tests/{HTML5 => modern/html}/parser/paragraph.html (100%)
rename ext/dom/tests/{HTML5 => modern/html}/parser/parser_warning_01.html (100%)
rename ext/dom/tests/{HTML5 => modern/html}/parser/parser_warning_02.html (100%)
rename ext/dom/tests/{HTML5 => modern/html}/parser/parser_warning_03.html (100%)
rename ext/dom/tests/{HTML5 => modern/html}/parser/predefined_namespaces.phpt (96%)
rename ext/dom/tests/{HTML5/serializer/Document_escape_attribute.phpt => modern/html/serializer/HTMLDocument_escape_attribute.phpt} (67%)
rename ext/dom/tests/{HTML5/serializer/Document_escape_nbsp.phpt => modern/html/serializer/HTMLDocument_escape_nbsp.phpt} (50%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_attribute_ns.phpt => modern/html/serializer/HTMLDocument_serialize_attribute_ns.phpt} (88%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_cdata.phpt => modern/html/serializer/HTMLDocument_serialize_cdata.phpt} (67%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_comment.phpt => modern/html/serializer/HTMLDocument_serialize_comment.phpt} (66%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_doctype.phpt => modern/html/serializer/HTMLDocument_serialize_doctype.phpt} (88%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_element_ns.phpt => modern/html/serializer/HTMLDocument_serialize_element_ns.phpt} (88%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_failing_stream.phpt => modern/html/serializer/HTMLDocument_serialize_failing_stream.phpt} (85%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_fragment.phpt => modern/html/serializer/HTMLDocument_serialize_fragment.phpt} (81%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_full_document.phpt => modern/html/serializer/HTMLDocument_serialize_full_document.phpt} (90%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_ns_imported_01.phpt => modern/html/serializer/HTMLDocument_serialize_ns_imported_01.phpt} (73%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_ns_imported_02.phpt => modern/html/serializer/HTMLDocument_serialize_ns_imported_02.phpt} (69%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_ns_imported_04.phpt => modern/html/serializer/HTMLDocument_serialize_ns_imported_03.phpt} (66%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_ns_imported_03.phpt => modern/html/serializer/HTMLDocument_serialize_ns_imported_04.phpt} (66%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_ns_imported_05.phpt => modern/html/serializer/HTMLDocument_serialize_ns_imported_05.phpt} (69%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_ns_imported_06.phpt => modern/html/serializer/HTMLDocument_serialize_ns_imported_06.phpt} (66%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_processing_instruction.phpt => modern/html/serializer/HTMLDocument_serialize_processing_instruction.phpt} (74%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_roots_test_empty.phpt => modern/html/serializer/HTMLDocument_serialize_roots_test_empty.phpt} (85%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_text_01.phpt => modern/html/serializer/HTMLDocument_serialize_text_01.phpt} (69%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_text_02.phpt => modern/html/serializer/HTMLDocument_serialize_text_02.phpt} (88%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_text_03.phpt => modern/html/serializer/HTMLDocument_serialize_text_03.phpt} (90%)
rename ext/dom/tests/{HTML5/serializer/Document_serialize_void_elements.phpt => modern/html/serializer/HTMLDocument_serialize_void_elements.phpt} (95%)
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_debug.phpt
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_01.phpt
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_02.phpt
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_03.phpt
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_fromFile_01.phpt
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_fromFile_02.phpt
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_fromFile_03.phpt
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_fromFile_04.phpt
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_fromString_01.phpt
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_fromString_02.phpt
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt
rename ext/dom/tests/{HTML5/interactions/Document_node_ownerDocument_for_XML.phpt => modern/xml/XMLDocument_node_ownerDocument_for_XML.phpt} (82%)
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_saveXML_node.phpt
create mode 100644 ext/dom/xml_document.c
diff --git a/ext/dom/config.m4 b/ext/dom/config.m4
index c43bb35f100b5..384ea6d5bc9f4 100644
--- a/ext/dom/config.m4
+++ b/ext/dom/config.m4
@@ -26,7 +26,7 @@ if test "$PHP_DOM" != "no"; then
$LEXBOR_DIR/ns/ns.c \
$LEXBOR_DIR/tag/tag.c"
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c \
- html5_document.c html5_serializer.c html5_parser.c namespace_compat.c \
+ xml_document.c html_document.c html5_serializer.c html5_parser.c namespace_compat.c \
domexception.c parentnode.c \
processinginstruction.c cdatasection.c \
documentfragment.c domimplementation.c \
diff --git a/ext/dom/config.w32 b/ext/dom/config.w32
index b663b64c69a5f..a18e8ebe3a60f 100644
--- a/ext/dom/config.w32
+++ b/ext/dom/config.w32
@@ -8,7 +8,7 @@ if (PHP_DOM == "yes") {
CHECK_HEADER_ADD_INCLUDE("libxml/parser.h", "CFLAGS_DOM", PHP_PHP_BUILD + "\\include\\libxml2")
) {
EXTENSION("dom", "php_dom.c attr.c document.c \
- html5_document.c html5_serializer.c html5_parser.c namespace_compat.c \
+ xml_document.c html_document.c html5_serializer.c html5_parser.c namespace_compat.c \
domexception.c parentnode.c processinginstruction.c \
cdatasection.c documentfragment.c domimplementation.c element.c \
node.c characterdata.c documenttype.c \
diff --git a/ext/dom/document.c b/ext/dom/document.c
index a31d9d58c052b..123598d1baff0 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -1098,7 +1098,8 @@ PHP_METHOD(DOM_Document, normalizeDocument)
}
/* }}} end dom_document_normalize_document */
-void php_dom_document_constructor(INTERNAL_FUNCTION_PARAMETERS)
+/* {{{ */
+PHP_METHOD(DOMDocument, __construct)
{
xmlDoc *docp = NULL, *olddoc;
dom_object *intern;
@@ -1137,19 +1138,13 @@ void php_dom_document_constructor(INTERNAL_FUNCTION_PARAMETERS)
}
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)docp, (void *)intern);
}
-
-/* {{{ */
-PHP_METHOD(DOMDocument, __construct)
-{
- php_dom_document_constructor(INTERNAL_FUNCTION_PARAM_PASSTHRU);
-}
/* }}} end DOMDocument::__construct */
-char *_dom_get_valid_file_path(char *source, char *resolved_path, int resolved_path_len ) /* {{{ */
+const char *_dom_get_valid_file_path(const char *source, char *resolved_path, int resolved_path_len ) /* {{{ */
{
xmlURI *uri;
xmlChar *escsource;
- char *file_dest;
+ const char *file_dest;
int isFileUri = 0;
uri = xmlCreateURI();
@@ -1202,7 +1197,7 @@ char *_dom_get_valid_file_path(char *source, char *resolved_path, int resolved_p
}
/* }}} */
-static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t source_len, size_t options) /* {{{ */
+xmlDocPtr dom_document_parser(zval *id, int mode, const char *source, size_t source_len, size_t options) /* {{{ */
{
xmlDocPtr ret;
xmlParserCtxtPtr ctxt = NULL;
@@ -1211,10 +1206,14 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so
int old_error_reporting = 0;
char *directory=NULL, resolved_path[MAXPATHLEN + 1];
- dom_object *intern = Z_DOMOBJ_P(id);
- php_libxml_ref_obj *document = intern->document;
-
- libxml_doc_props const* doc_props = dom_get_doc_props_read_only(document);
+ libxml_doc_props const* doc_props;
+ if (id == NULL) {
+ doc_props = dom_get_doc_props_read_only(NULL);
+ } else {
+ dom_object *intern = Z_DOMOBJ_P(id);
+ php_libxml_ref_obj *document = intern->document;
+ doc_props = dom_get_doc_props_read_only(document);
+ }
validate = doc_props->validateonparse;
resolve_externals = doc_props->resolveexternals;
keep_blanks = doc_props->preservewhitespace;
@@ -1224,12 +1223,11 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so
xmlInitParser();
if (mode == DOM_LOAD_FILE) {
- char *file_dest;
if (CHECK_NULL_PATH(source, source_len)) {
- zend_value_error("Path to document must not contain any null bytes");
+ zend_argument_value_error(1, "must not contain any null bytes");
return NULL;
}
- file_dest = _dom_get_valid_file_path(source, resolved_path, MAXPATHLEN);
+ const char *file_dest = _dom_get_valid_file_path(source, resolved_path, MAXPATHLEN);
if (file_dest) {
ctxt = xmlCreateFileParserCtxt(file_dest);
}
@@ -1315,7 +1313,7 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so
}
/* }}} */
-void php_dom_finish_loading_document(zval *this, zval *return_value, xmlDocPtr newdoc)
+static void php_dom_finish_loading_document(zval *this, zval *return_value, xmlDocPtr newdoc)
{
if (!newdoc)
RETURN_FALSE;
@@ -1323,7 +1321,7 @@ void php_dom_finish_loading_document(zval *this, zval *return_value, xmlDocPtr n
dom_object *intern = Z_DOMOBJ_P(this);
size_t old_modification_nr = 0;
if (intern != NULL) {
- bool is_html5_class = intern->document->is_html5_class;
+ bool is_modern_api_class = intern->document->is_modern_api_class;
xmlDocPtr docp = (xmlDocPtr) dom_object_get_node(intern);
dom_doc_propsptr doc_prop = NULL;
if (docp != NULL) {
@@ -1343,7 +1341,7 @@ void php_dom_finish_loading_document(zval *this, zval *return_value, xmlDocPtr n
RETURN_FALSE;
}
intern->document->doc_props = doc_prop;
- intern->document->is_html5_class = is_html5_class;
+ intern->document->is_modern_api_class = is_modern_api_class;
}
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
@@ -1356,7 +1354,8 @@ void php_dom_finish_loading_document(zval *this, zval *return_value, xmlDocPtr n
RETURN_TRUE;
}
-void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode, xmlDocPtr *doc_out) {
+static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode)
+{
char *source;
size_t source_len;
zend_long options = 0;
@@ -1379,8 +1378,6 @@ void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode, xmlDocPtr *doc_o
}
xmlDocPtr newdoc = dom_document_parser(ZEND_THIS, mode, source, source_len, options);
- *doc_out = newdoc;
-
php_dom_finish_loading_document(ZEND_THIS, return_value, newdoc);
}
@@ -1389,8 +1386,7 @@ Since: DOM Level 3
*/
PHP_METHOD(DOMDocument, load)
{
- xmlDocPtr unused;
- dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE, &unused);
+ dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
}
/* }}} end dom_document_load */
@@ -1399,8 +1395,7 @@ Since: DOM Level 3
*/
PHP_METHOD(DOMDocument, loadXML)
{
- xmlDocPtr unused;
- dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING, &unused);
+ dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
}
/* }}} end dom_document_loadxml */
@@ -1663,7 +1658,8 @@ static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type
zval *id;
xmlDoc *docp;
dom_object *intern;
- char *source = NULL, *valid_file = NULL;
+ char *source = NULL;
+ const char *valid_file = NULL;
size_t source_len = 0;
int valid_opts = 0;
zend_long flags = 0;
@@ -1773,7 +1769,8 @@ static void _dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAMETERS, int typ
zval *id;
xmlDoc *docp;
dom_object *intern;
- char *source = NULL, *valid_file = NULL;
+ char *source = NULL;
+ const char *valid_file = NULL;
size_t source_len = 0;
xmlRelaxNGParserCtxtPtr parser;
xmlRelaxNGPtr sptr;
@@ -2073,6 +2070,11 @@ PHP_METHOD(DOM_Document, registerNodeClass)
RETURN_THROWS();
}
+ if (basece->ce_flags & ZEND_ACC_ABSTRACT) {
+ zend_argument_value_error(1, "must be a non-abstract class");
+ RETURN_THROWS();
+ }
+
if (ce == NULL || instanceof_function(ce, basece)) {
if (UNEXPECTED(ce != NULL && (ce->ce_flags & ZEND_ACC_ABSTRACT))) {
zend_argument_value_error(2, "must not be an abstract class");
diff --git a/ext/dom/dom_ce.h b/ext/dom/dom_ce.h
index a489b059abcad..5b661b2abbb94 100644
--- a/ext/dom/dom_ce.h
+++ b/ext/dom/dom_ce.h
@@ -23,7 +23,8 @@ extern PHP_DOM_EXPORT zend_class_entry *dom_domexception_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_domimplementation_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_documentfragment_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_document_class_entry;
-extern PHP_DOM_EXPORT zend_class_entry *dom_html5_document_class_entry;
+extern PHP_DOM_EXPORT zend_class_entry *dom_html_document_class_entry;
+extern PHP_DOM_EXPORT zend_class_entry *dom_xml_document_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_nodelist_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_namednodemap_class_entry;
extern PHP_DOM_EXPORT zend_class_entry *dom_characterdata_class_entry;
diff --git a/ext/dom/dom_properties.h b/ext/dom/dom_properties.h
index a5a144734e45a..349b604dddcc7 100644
--- a/ext/dom/dom_properties.h
+++ b/ext/dom/dom_properties.h
@@ -62,7 +62,7 @@ zend_result dom_document_substitue_entities_read(dom_object *obj, zval *retval);
zend_result dom_document_substitue_entities_write(dom_object *obj, zval *newval);
/* html5 document properties */
-zend_result dom_html5_document_encoding_write(dom_object *obj, zval *retval);
+zend_result dom_html_document_encoding_write(dom_object *obj, zval *retval);
/* documenttype properties */
zend_result dom_documenttype_name_read(dom_object *obj, zval *retval);
diff --git a/ext/dom/html5_document.c b/ext/dom/html_document.c
similarity index 92%
rename from ext/dom/html5_document.c
rename to ext/dom/html_document.c
index 9fb4d4411d39f..4766d8884efaa 100644
--- a/ext/dom/html5_document.c
+++ b/ext/dom/html_document.c
@@ -494,7 +494,46 @@ static bool check_options_validity(zend_long options)
return true;
}
-PHP_METHOD(DOM_HTML5Document, loadHTML)
+PHP_METHOD(DOM_HTMLDocument, createEmpty)
+{
+ const char *encoding = "UTF-8";
+ size_t encoding_len = strlen("UTF-8");
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &encoding, &encoding_len) == FAILURE) {
+ RETURN_THROWS();
+ }
+
+ const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name((const lxb_char_t *) encoding, encoding_len);
+
+ if (encoding_data == NULL) {
+ zend_argument_value_error(1, "is not a valid document encoding");
+ RETURN_THROWS();
+ }
+
+#ifdef LIBXML_HTML_ENABLED
+ xmlDocPtr lxml_doc = htmlNewDocNoDtD(NULL, NULL);
+ if (UNEXPECTED(lxml_doc == NULL)) {
+ goto oom;
+ }
+#else
+ xmlDocPtr lxml_doc = xmlNewDoc((const xmlChar *) "1.0");
+ if (UNEXPECTED(lxml_doc == NULL)) {
+ goto oom;
+ }
+ lxml_doc->type = XML_HTML_DOCUMENT_NODE;
+#endif
+
+ lxml_doc->encoding = xmlStrdup((const xmlChar *) encoding);
+
+ dom_object *intern = php_dom_instantiate_object_helper(return_value, dom_html_document_class_entry, (xmlNodePtr) lxml_doc, NULL);
+ intern->document->is_modern_api_class = true;
+ return;
+
+oom:
+ php_dom_throw_error(INVALID_STATE_ERR, 1);
+ RETURN_THROWS();
+}
+
+PHP_METHOD(DOM_HTMLDocument, createFromString)
{
const char *source;
size_t source_len;
@@ -588,7 +627,8 @@ PHP_METHOD(DOM_HTML5Document, loadHTML)
lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
}
- php_dom_finish_loading_document(ZEND_THIS, return_value, lxml_doc);
+ dom_object *intern = php_dom_instantiate_object_helper(return_value, dom_html_document_class_entry, (xmlNodePtr) lxml_doc, NULL);
+ intern->document->is_modern_api_class = true;
return;
fail_oom:
@@ -597,7 +637,7 @@ PHP_METHOD(DOM_HTML5Document, loadHTML)
RETURN_THROWS();
}
-PHP_METHOD(DOM_HTML5Document, loadHTMLFile)
+PHP_METHOD(DOM_HTMLDocument, createFromFile)
{
const char *filename;
size_t filename_len;
@@ -607,14 +647,14 @@ PHP_METHOD(DOM_HTML5Document, loadHTMLFile)
RETURN_THROWS();
}
- if (!check_options_validity(options)) {
+ /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */
+ if (strstr(filename, "%00")) {
+ zend_argument_value_error(1, "must not contain percent-encoded NUL bytes");
RETURN_THROWS();
}
- /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */
- if (strstr(filename, "%00")) {
- php_error_docref(NULL, E_WARNING, "URI must not contain percent-encoded NUL bytes");
- RETURN_FALSE;
+ if (!check_options_validity(options)) {
+ RETURN_THROWS();
}
dom_lexbor_libxml2_bridge_application_data application_data;
@@ -628,6 +668,16 @@ PHP_METHOD(DOM_HTML5Document, loadHTMLFile)
}
ctx.application_data = &application_data;
+ // TODO: context from LIBXML(stream_context) ???
+ // TODO: https://mimesniff.spec.whatwg.org/#parsing-a-mime-type
+ stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, /* opened_path */ NULL, /* context */ NULL);
+ if (!stream) {
+ if (!EG(exception)) {
+ zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", filename);
+ }
+ RETURN_THROWS();
+ }
+
lxb_html_document_t *document = lxb_html_document_create();
if (UNEXPECTED(document == NULL)) {
goto fail_oom;
@@ -638,12 +688,6 @@ PHP_METHOD(DOM_HTML5Document, loadHTMLFile)
goto fail_oom;
}
- stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, /* opened_path */ NULL, /* context */ NULL);
- if (!stream) {
- lxb_html_document_destroy(document);
- RETURN_FALSE;
- }
-
/* Setup everything encoding & decoding related */
bool first_read = true;
dom_decoding_encoding_ctx decoding_encoding_ctx;
@@ -708,7 +752,8 @@ PHP_METHOD(DOM_HTML5Document, loadHTMLFile)
lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
}
- php_dom_finish_loading_document(ZEND_THIS, return_value, lxml_doc);
+ dom_object *intern = php_dom_instantiate_object_helper(return_value, dom_html_document_class_entry, (xmlNodePtr) lxml_doc, NULL);
+ intern->document->is_modern_api_class = true;
return;
fail_oom:
@@ -720,78 +765,6 @@ PHP_METHOD(DOM_HTML5Document, loadHTMLFile)
RETURN_THROWS();
}
-/* Living spec never creates explicit namespace declaration nodes.
- * They are only written upon serialization but never appear in the tree.
- * So in principle we could just ignore them outright.
- * However, step 10 in https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token
- * requires us to have the declaration as an attribute available */
-static void dom_mark_namespaces_as_attributes_too(xmlDocPtr doc)
-{
- if (!doc) {
- return;
- }
-
- xmlNodePtr node = doc->children;
- while (node != NULL) {
- if (node->type == XML_ELEMENT_NODE) {
- dom_ns_compat_mark_attribute_list(node->nsDef);
-
- if (node->children) {
- node = node->children;
- continue;
- }
- }
-
- if (node->next) {
- node = node->next;
- } else {
- /* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */
- do {
- node = node->parent;
- if (node == NULL) {
- return;
- }
- } while (node->next == NULL);
- node = node->next;
- }
- }
-}
-
-void dom_mark_namespaces_for_copy_based_on_copy(xmlNodePtr copy, const xmlNode *original)
-{
- xmlNodePtr copy_current = copy;
- const xmlNode *original_current = original;
- while (copy_current != NULL) {
- ZEND_ASSERT(original_current != NULL);
-
- if (copy_current->type == XML_ELEMENT_NODE) {
- dom_ns_compat_copy_attribute_list_mark(copy_current->nsDef, original_current->nsDef);
-
- if (copy_current->children) {
- copy_current = copy_current->children;
- original_current = original_current->children;
- continue;
- }
- }
-
- if (copy_current->next) {
- copy_current = copy_current->next;
- original_current = original_current->next;
- } else {
- /* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */
- do {
- copy_current = copy_current->parent;
- if (copy_current == NULL) {
- return;
- }
- original_current = original_current->parent;
- } while (copy_current->next == NULL);
- copy_current = copy_current->next;
- original_current = original_current->next;
- }
- }
-}
-
static zend_result dom_write_output_smart_str(void *ctx, const char *buf, size_t size)
{
smart_str_appendl((smart_str *) ctx, buf, size);
@@ -896,7 +869,7 @@ static zend_result dom_common_save(dom_output_ctx *output_ctx, const xmlDoc *doc
return SUCCESS;
}
-PHP_METHOD(DOM_HTML5Document, saveHTMLFile)
+PHP_METHOD(DOM_HTMLDocument, saveHTMLFile)
{
zval *id;
xmlDoc *docp;
@@ -935,7 +908,7 @@ PHP_METHOD(DOM_HTML5Document, saveHTMLFile)
RETURN_LONG(bytes);
}
-PHP_METHOD(DOM_HTML5Document, saveHTML)
+PHP_METHOD(DOM_HTMLDocument, saveHTML)
{
zval *nodep = NULL;
const xmlDoc *docp;
@@ -969,27 +942,12 @@ PHP_METHOD(DOM_HTML5Document, saveHTML)
RETURN_STR(smart_str_extract(&buf));
}
-PHP_METHOD(DOM_HTML5Document, __construct)
-{
- php_dom_document_constructor(INTERNAL_FUNCTION_PARAM_PASSTHRU);
- Z_DOMOBJ_P(ZEND_THIS)->document->is_html5_class = true;
-}
-
-PHP_METHOD(DOM_HTML5Document, load)
-{
- xmlDocPtr doc = NULL;
- dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE, &doc);
- dom_mark_namespaces_as_attributes_too(doc);
-}
-
-PHP_METHOD(DOM_HTML5Document, loadXML)
+PHP_METHOD(DOM_HTMLDocument, __construct)
{
- xmlDocPtr doc = NULL;
- dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING, &doc);
- dom_mark_namespaces_as_attributes_too(doc);
+ ZEND_UNREACHABLE();
}
-zend_result dom_html5_document_encoding_write(dom_object *obj, zval *newval)
+zend_result dom_html_document_encoding_write(dom_object *obj, zval *newval)
{
xmlDoc *docp = (xmlDocPtr) dom_object_get_node(obj);
if (docp == NULL) {
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index 06063ba9d33f7..0701227c6e7ee 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -42,7 +42,8 @@ PHP_DOM_EXPORT zend_class_entry *dom_childnode_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_domimplementation_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_documentfragment_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_document_class_entry;
-PHP_DOM_EXPORT zend_class_entry *dom_html5_document_class_entry;
+PHP_DOM_EXPORT zend_class_entry *dom_html_document_class_entry;
+PHP_DOM_EXPORT zend_class_entry *dom_xml_document_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_nodelist_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_namednodemap_class_entry;
PHP_DOM_EXPORT zend_class_entry *dom_characterdata_class_entry;
@@ -74,7 +75,8 @@ zend_object_handlers dom_xpath_object_handlers;
static HashTable classes;
/* {{{ prop handler tables */
static HashTable dom_document_prop_handlers;
-static HashTable dom_html5_document_prop_handlers;
+static HashTable dom_xml_document_prop_handlers;
+static HashTable dom_html_document_prop_handlers;
static HashTable dom_documentfragment_prop_handlers;
static HashTable dom_node_prop_handlers;
static HashTable dom_nodelist_prop_handlers;
@@ -211,7 +213,7 @@ static void dom_copy_doc_props(php_libxml_ref_obj *source_doc, php_libxml_ref_ob
zend_hash_copy(dest->classmap, source->classmap, NULL);
}
- dest_doc->is_html5_class = source_doc->is_html5_class;
+ dest_doc->is_modern_api_class = source_doc->is_modern_api_class;
}
}
@@ -608,6 +610,22 @@ static void dom_free(void *ptr) {
efree(ptr);
}
+static void register_nondeprecated_xml_props(HashTable *table)
+{
+ dom_register_prop_handler(table, "encoding", sizeof("encoding")-1, dom_document_encoding_read, dom_document_encoding_write);
+ dom_register_prop_handler(table, "xmlEncoding", sizeof("xmlEncoding")-1, dom_document_encoding_read, NULL);
+ dom_register_prop_handler(table, "standalone", sizeof("standalone")-1, dom_document_standalone_read, dom_document_standalone_write);
+ dom_register_prop_handler(table, "xmlStandalone", sizeof("xmlStandalone")-1, dom_document_standalone_read, dom_document_standalone_write);
+ dom_register_prop_handler(table, "version", sizeof("version")-1, dom_document_version_read, dom_document_version_write);
+ dom_register_prop_handler(table, "xmlVersion", sizeof("xmlVersion")-1, dom_document_version_read, dom_document_version_write);
+ dom_register_prop_handler(table, "formatOutput", sizeof("formatOutput")-1, dom_document_format_output_read, dom_document_format_output_write);
+ dom_register_prop_handler(table, "validateOnParse", sizeof("validateOnParse")-1, dom_document_validate_on_parse_read, dom_document_validate_on_parse_write);
+ dom_register_prop_handler(table, "resolveExternals", sizeof("resolveExternals")-1, dom_document_resolve_externals_read, dom_document_resolve_externals_write);
+ dom_register_prop_handler(table, "preserveWhiteSpace", sizeof("preserveWhitespace")-1, dom_document_preserve_whitespace_read, dom_document_preserve_whitespace_write);
+ dom_register_prop_handler(table, "recover", sizeof("recover")-1, dom_document_recover_read, dom_document_recover_write);
+ dom_register_prop_handler(table, "substituteEntities", sizeof("substituteEntities")-1, dom_document_substitue_entities_read, dom_document_substitue_entities_write);
+}
+
/* {{{ PHP_MINIT_FUNCTION(dom) */
PHP_MINIT_FUNCTION(dom)
{
@@ -702,7 +720,6 @@ PHP_MINIT_FUNCTION(dom)
HashTable dom_abstract_base_document_prop_handlers;
zend_hash_init(&dom_abstract_base_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1);
dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "doctype", sizeof("doctype")-1, dom_document_doctype_read, NULL);
- dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "implementation", sizeof("implementation")-1, dom_document_implementation_read, NULL);
dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "documentElement", sizeof("documentElement")-1, dom_document_document_element_read, NULL);
dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "strictErrorChecking", sizeof("strictErrorChecking")-1, dom_document_strict_error_checking_read, dom_document_strict_error_checking_write);
dom_register_prop_handler(&dom_abstract_base_document_prop_handlers, "documentURI", sizeof("documentURI")-1, dom_document_document_uri_read, dom_document_document_uri_write);
@@ -715,30 +732,29 @@ PHP_MINIT_FUNCTION(dom)
dom_document_class_entry = register_class_DOMDocument(dom_abstract_base_document_class_entry);
dom_document_class_entry->create_object = dom_objects_new;
zend_hash_init(&dom_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1);
+ dom_register_prop_handler(&dom_document_prop_handlers, "implementation", sizeof("implementation")-1, dom_document_implementation_read, NULL);
dom_register_prop_handler(&dom_document_prop_handlers, "actualEncoding", sizeof("actualEncoding")-1, dom_document_encoding_read, NULL);
- dom_register_prop_handler(&dom_document_prop_handlers, "encoding", sizeof("encoding")-1, dom_document_encoding_read, dom_document_encoding_write);
- dom_register_prop_handler(&dom_document_prop_handlers, "xmlEncoding", sizeof("xmlEncoding")-1, dom_document_encoding_read, NULL);
- dom_register_prop_handler(&dom_document_prop_handlers, "standalone", sizeof("standalone")-1, dom_document_standalone_read, dom_document_standalone_write);
- dom_register_prop_handler(&dom_document_prop_handlers, "xmlStandalone", sizeof("xmlStandalone")-1, dom_document_standalone_read, dom_document_standalone_write);
- dom_register_prop_handler(&dom_document_prop_handlers, "version", sizeof("version")-1, dom_document_version_read, dom_document_version_write);
- dom_register_prop_handler(&dom_document_prop_handlers, "xmlVersion", sizeof("xmlVersion")-1, dom_document_version_read, dom_document_version_write);
dom_register_prop_handler(&dom_document_prop_handlers, "config", sizeof("config")-1, dom_document_config_read, NULL);
- dom_register_prop_handler(&dom_document_prop_handlers, "formatOutput", sizeof("formatOutput")-1, dom_document_format_output_read, dom_document_format_output_write);
- dom_register_prop_handler(&dom_document_prop_handlers, "validateOnParse", sizeof("validateOnParse")-1, dom_document_validate_on_parse_read, dom_document_validate_on_parse_write);
- dom_register_prop_handler(&dom_document_prop_handlers, "resolveExternals", sizeof("resolveExternals")-1, dom_document_resolve_externals_read, dom_document_resolve_externals_write);
- dom_register_prop_handler(&dom_document_prop_handlers, "preserveWhiteSpace", sizeof("preserveWhitespace")-1, dom_document_preserve_whitespace_read, dom_document_preserve_whitespace_write);
- dom_register_prop_handler(&dom_document_prop_handlers, "recover", sizeof("recover")-1, dom_document_recover_read, dom_document_recover_write);
- dom_register_prop_handler(&dom_document_prop_handlers, "substituteEntities", sizeof("substituteEntities")-1, dom_document_substitue_entities_read, dom_document_substitue_entities_write);
+ register_nondeprecated_xml_props(&dom_document_prop_handlers);
zend_hash_merge(&dom_document_prop_handlers, &dom_abstract_base_document_prop_handlers, dom_copy_prop_handler, 0);
zend_hash_add_ptr(&classes, dom_document_class_entry->name, &dom_document_prop_handlers);
- dom_html5_document_class_entry = register_class_DOM_HTML5Document(dom_document_class_entry);
+ dom_html_document_class_entry = register_class_DOM_HTMLDocument(dom_abstract_base_document_class_entry);
dom_document_class_entry->create_object = dom_objects_new;
- zend_hash_init(&dom_html5_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1);
- dom_register_prop_handler(&dom_html5_document_prop_handlers, "encoding", sizeof("encoding")-1, dom_document_encoding_read, dom_html5_document_encoding_write);
- zend_hash_merge(&dom_html5_document_prop_handlers, &dom_document_prop_handlers, dom_copy_prop_handler, 0);
- zend_hash_add_ptr(&classes, dom_html5_document_class_entry->name, &dom_html5_document_prop_handlers);
+ zend_hash_init(&dom_html_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1);
+ dom_register_prop_handler(&dom_html_document_prop_handlers, "encoding", sizeof("encoding")-1, dom_document_encoding_read, dom_html_document_encoding_write);
+
+ zend_hash_merge(&dom_html_document_prop_handlers, &dom_abstract_base_document_prop_handlers, dom_copy_prop_handler, 0);
+ zend_hash_add_ptr(&classes, dom_html_document_class_entry->name, &dom_html_document_prop_handlers);
+
+ dom_xml_document_class_entry = register_class_DOM_XMLDocument(dom_abstract_base_document_class_entry);
+ dom_xml_document_class_entry->create_object = dom_objects_new;
+ zend_hash_init(&dom_xml_document_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1);
+ register_nondeprecated_xml_props(&dom_xml_document_prop_handlers);
+
+ zend_hash_merge(&dom_xml_document_prop_handlers, &dom_abstract_base_document_prop_handlers, dom_copy_prop_handler, 0);
+ zend_hash_add_ptr(&classes, dom_xml_document_class_entry->name, &dom_xml_document_prop_handlers);
zend_hash_destroy(&dom_abstract_base_document_prop_handlers);
@@ -915,7 +931,8 @@ PHP_MINFO_FUNCTION(dom)
PHP_MSHUTDOWN_FUNCTION(dom) /* {{{ */
{
zend_hash_destroy(&dom_document_prop_handlers);
- zend_hash_destroy(&dom_html5_document_prop_handlers);
+ zend_hash_destroy(&dom_html_document_prop_handlers);
+ zend_hash_destroy(&dom_xml_document_prop_handlers);
zend_hash_destroy(&dom_documentfragment_prop_handlers);
zend_hash_destroy(&dom_node_prop_handlers);
zend_hash_destroy(&dom_namespace_node_prop_handlers);
@@ -1212,10 +1229,18 @@ PHP_DOM_EXPORT bool php_dom_create_object(xmlNodePtr obj, zval *return_value, do
switch (obj->type) {
case XML_DOCUMENT_NODE:
+ {
+ if (domobj && domobj->document->is_modern_api_class) {
+ ce = dom_xml_document_class_entry;
+ } else {
+ ce = dom_document_class_entry;
+ }
+ break;
+ }
case XML_HTML_DOCUMENT_NODE:
{
- if (domobj && domobj->document->is_html5_class) {
- ce = dom_html5_document_class_entry;
+ if (domobj && domobj->document->is_modern_api_class) {
+ ce = dom_html_document_class_entry;
} else {
ce = dom_document_class_entry;
}
@@ -1293,20 +1318,27 @@ PHP_DOM_EXPORT bool php_dom_create_object(xmlNodePtr obj, zval *return_value, do
if (domobj && domobj->document) {
ce = dom_get_doc_classmap(domobj->document, ce);
}
+ php_dom_instantiate_object_helper(return_value, ce, obj, domobj);
+ return 0;
+}
+/* }}} end php_domobject_new */
+
+dom_object *php_dom_instantiate_object_helper(zval *return_value, zend_class_entry *ce, xmlNodePtr obj, dom_object *parent)
+{
object_init_ex(return_value, ce);
- intern = Z_DOMOBJ_P(return_value);
+ dom_object *intern = Z_DOMOBJ_P(return_value);
if (obj->doc != NULL) {
- if (domobj != NULL) {
- intern->document = domobj->document;
+ if (parent != NULL) {
+ intern->document = parent->document;
}
php_libxml_increment_doc_ref((php_libxml_node_object *)intern, obj->doc);
}
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, obj, (void *)intern);
- return 0;
+
+ return intern;
}
-/* }}} end php_domobject_new */
void php_dom_create_implementation(zval *retval) {
object_init_ex(retval, dom_domimplementation_class_entry);
@@ -1875,7 +1907,7 @@ xmlNodePtr dom_clone_node(xmlNodePtr node, xmlDocPtr doc, const dom_object *inte
return NULL;
}
- if (intern->document && intern->document->is_html5_class) {
+ if (intern->document && intern->document->is_modern_api_class) {
dom_mark_namespaces_for_copy_based_on_copy(copy, node);
}
diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h
index d212ca0e61c5f..fe9dafaf70018 100644
--- a/ext/dom/php_dom.h
+++ b/ext/dom/php_dom.h
@@ -158,10 +158,12 @@ void php_dom_reconcile_attribute_namespace_after_insertion(xmlAttrPtr attrp);
void php_dom_document_constructor(INTERNAL_FUNCTION_PARAMETERS);
+dom_object *php_dom_instantiate_object_helper(zval *return_value, zend_class_entry *ce, xmlNodePtr obj, dom_object *parent);
+
#define DOM_LOAD_STRING 0
#define DOM_LOAD_FILE 1
-void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode, xmlDocPtr *doc_out);
+xmlDocPtr dom_document_parser(zval *id, int mode, const char *source, size_t source_len, size_t options);
/* parentnode */
void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc);
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index ce7cdb6ce6e6b..71d705ef94b68 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -347,7 +347,7 @@ class DOMNode
public bool $isConnected;
/** @readonly */
- public ?DOMDocument $ownerDocument;
+ public ?DOM\Document $ownerDocument;
/** @readonly */
public ?string $namespaceURI;
@@ -466,7 +466,6 @@ public function __sleep(): array {}
public function __wakeup(): void {}
}
- /** @alias DOM\Implementation */
class DOMImplementation
{
/** @tentative-return-type */
@@ -722,6 +721,9 @@ public function insertAdjacentText(string $where, string $data): void {}
class DOMDocument extends DOM\Document
{
+ /** @readonly */
+ public DOMImplementation $implementation;
+
/**
* @readonly
* @deprecated
@@ -913,11 +915,11 @@ public function __construct(string $name, string $value = "") {}
class DOMXPath
{
/** @readonly */
- public DOMDocument $document;
+ public DOM\Document $document;
public bool $registerNodeNamespaces;
- public function __construct(DOMDocument $document, bool $registerNodeNS = true) {}
+ public function __construct(DOM\Document $document, bool $registerNodeNS = true) {}
/** @tentative-return-type */
public function evaluate(string $expression, ?DOMNode $contextNode = null, bool $registerNodeNS = true): mixed {}
@@ -1035,9 +1037,6 @@ abstract class Document extends DOMNode implements DOMParentNode
/** @readonly */
public ?DocumentType $doctype;
- /** @readonly */
- public Implementation $implementation;
-
/** @readonly */
public ?Element $documentElement;
@@ -1087,13 +1086,13 @@ public function createTextNode(string $data): Text {}
public function getElementById(string $elementId): ?Element {}
/** @tentative-return-type */
- public function getElementsByTagName(string $qualifiedName): NodeList {}
+ public function getElementsByTagName(string $qualifiedName): \DOMNodeList {}
/** @tentative-return-type */
- public function getElementsByTagNameNS(?string $namespace, string $localName): NodeList {}
+ public function getElementsByTagNameNS(?string $namespace, string $localName): \DOMNodeList {}
/** @return Node|false */
- public function importNode(Node $node, bool $deep = false) {}
+ public function importNode(\DOMNode $node, bool $deep = false) {}
/** @tentative-return-type */
public function normalizeDocument(): void {}
@@ -1101,7 +1100,7 @@ public function normalizeDocument(): void {}
/** @tentative-return-type */
public function registerNodeClass(string $baseClass, ?string $extendedClass): bool {}
- #ifdef LIBXML_SCHEMAS_ENABLED
+#ifdef LIBXML_SCHEMAS_ENABLED
/** @tentative-return-type */
public function schemaValidate(string $filename, int $flags = 0): bool {}
@@ -1113,10 +1112,10 @@ public function relaxNGValidate(string $filename): bool {}
/** @tentative-return-type */
public function relaxNGValidateSource(string $source): bool {}
- #endif
+#endif
/** @tentative-return-type */
- public function adoptNode(Node $node): Node|false {}
+ public function adoptNode(\DOMNode $node): \DOMNode|false {}
/**
* @param Node|string $nodes
@@ -1134,23 +1133,92 @@ public function prepend(...$nodes): void {}
public function replaceChildren(...$nodes): void {}
}
- class HTML5Document extends \DOMDocument
+ final class HTMLDocument extends DOM\Document
{
- public function __construct(string $xmlVersion = "1.0", string $encoding = "") {}
+ private function __construct() {}
- public function load(string $filename, int $options = 0): bool {}
+ public static function createEmpty(string $encoding = "UTF-8"): HTMLDocument {}
- public function loadXML(string $source, int $options = 0): bool {}
+ public static function createFromFile(string $path, int $options = 0): HTMLDocument {}
- public function loadHTML(string $source, int $options = 0): bool {}
+ public static function createFromString(string $source, int $options = 0): HTMLDocument {}
- public function loadHTMLFile(string $filename, int $options = 0): bool {}
+ /** @implementation-alias DOMDocument::saveXML */
+ public function saveXML(?\DOMNode $node = null, int $options = 0): string|false {}
+
+ /** @implementation-alias DOMDocument::save */
+ public function saveXMLFile(string $filename, int $options = 0): int|false {}
public function saveHTML(?\DOMNode $node = null): string|false {}
public function saveHTMLFile(string $filename): int|false {}
}
+ final class XMLDocument extends DOM\Document
+ {
+ /** @implementation-alias DOM\HTMLDocument::__construct */
+ private function __construct() {}
+
+ public static function createEmpty(string $version = "1.0", string $encoding = "UTF-8"): XMLDocument {}
+
+ public static function createFromFile(string $path, int $options = 0): XMLDocument {}
+
+ public static function createFromString(string $source, int $options = 0): XMLDocument {}
+
+ /** @readonly */
+ public ?string $xmlEncoding;
+
+ public bool $standalone;
+
+ public bool $xmlStandalone;
+
+ public ?string $version;
+
+ public ?string $xmlVersion;
+
+ public bool $formatOutput;
+
+ public bool $validateOnParse;
+
+ public bool $resolveExternals;
+
+ public bool $preserveWhiteSpace;
+
+ public bool $recover;
+
+ public bool $substituteEntities;
+
+ /**
+ * @implementation-alias DOMDocument::createEntityReference
+ * @return DOMEntityReference|false
+ */
+ public function createEntityReference(string $name) {}
+
+ /**
+ * @tentative-return-type
+ * @implementation-alias DOMDocument::validate
+ */
+ public function validate(): bool {}
+
+ /**
+ * @tentative-return-type
+ * @implementation-alias DOMDocument::xinclude
+ */
+ public function xinclude(int $options = 0): int|false {}
+
+ /**
+ * @tentative-return-type
+ * @implementation-alias DOMDocument::saveXML
+ */
+ public function saveXML(?\DOMNode $node = null, int $options = 0): string|false {}
+
+ /**
+ * @tentative-return-type
+ * @implementation-alias DOMDocument::save
+ */
+ public function saveXMLFile(string $filename, int $options = 0): int|false {}
+ }
+
/** @implementation-alias dom_import_simplexml */
function import_simplexml(object $node): DOMElement {}
}
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index be7d13724ea31..96e7149837380 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 2ee03c3ce38c5f9e89cc16015df7f8455713f879 */
+ * Stub hash: afcf0dfba2c9d3ae0334f129f5852229bdde8d5f */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -421,7 +421,7 @@ ZEND_END_ARG_INFO()
#if defined(LIBXML_XPATH_ENABLED)
ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOMXPath___construct, 0, 0, 1)
- ZEND_ARG_OBJ_INFO(0, document, DOMDocument, 0)
+ ZEND_ARG_OBJ_INFO(0, document, DOM\\Document, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, registerNodeNS, _IS_BOOL, 0, "true")
ZEND_END_ARG_INFO()
#endif
@@ -493,17 +493,12 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getE
ZEND_ARG_TYPE_INFO(0, elementId, IS_STRING, 0)
ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getElementsByTagName, 0, 1, DOM\\NodeList, 0)
- ZEND_ARG_TYPE_INFO(0, qualifiedName, IS_STRING, 0)
-ZEND_END_ARG_INFO()
+#define arginfo_class_DOM_Document_getElementsByTagName arginfo_class_DOMElement_getElementsByTagName
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getElementsByTagNameNS, 0, 2, DOM\\NodeList, 0)
- ZEND_ARG_TYPE_INFO(0, namespace, IS_STRING, 1)
- ZEND_ARG_TYPE_INFO(0, localName, IS_STRING, 0)
-ZEND_END_ARG_INFO()
+#define arginfo_class_DOM_Document_getElementsByTagNameNS arginfo_class_DOMElement_getElementsByTagNameNS
ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_Document_importNode, 0, 0, 1)
- ZEND_ARG_OBJ_INFO(0, node, DOM\\Node, 0)
+ ZEND_ARG_OBJ_INFO(0, node, DOMNode, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, deep, _IS_BOOL, 0, "false")
ZEND_END_ARG_INFO()
@@ -540,8 +535,8 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOM_Document_rel
ZEND_END_ARG_INFO()
#endif
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_TYPE_MASK_EX(arginfo_class_DOM_Document_adoptNode, 0, 1, DOM\\Node, MAY_BE_FALSE)
- ZEND_ARG_OBJ_INFO(0, node, DOM\\Node, 0)
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_TYPE_MASK_EX(arginfo_class_DOM_Document_adoptNode, 0, 1, DOMNode, MAY_BE_FALSE)
+ ZEND_ARG_OBJ_INFO(0, node, DOMNode, 0)
ZEND_END_ARG_INFO()
#define arginfo_class_DOM_Document_append arginfo_class_DOMParentNode_append
@@ -550,33 +545,67 @@ ZEND_END_ARG_INFO()
#define arginfo_class_DOM_Document_replaceChildren arginfo_class_DOMParentNode_append
-ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_HTML5Document___construct, 0, 0, 0)
- ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, xmlVersion, IS_STRING, 0, "\"1.0\"")
- ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 0, "\"\"")
+#define arginfo_class_DOM_HTMLDocument___construct arginfo_class_DOMDocumentFragment___construct
+
+ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_HTMLDocument_createEmpty, 0, 0, DOM\\HTMLDocument, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 0, "\"UTF-8\"")
ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_DOM_HTML5Document_load, 0, 1, _IS_BOOL, 0)
- ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0)
+ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_HTMLDocument_createFromFile, 0, 1, DOM\\HTMLDocument, 0)
+ ZEND_ARG_TYPE_INFO(0, path, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_DOM_HTML5Document_loadXML, 0, 1, _IS_BOOL, 0)
+ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_HTMLDocument_createFromString, 0, 1, DOM\\HTMLDocument, 0)
ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
ZEND_END_ARG_INFO()
-#define arginfo_class_DOM_HTML5Document_loadHTML arginfo_class_DOM_HTML5Document_loadXML
+ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveXML, 0, 0, MAY_BE_STRING|MAY_BE_FALSE)
+ ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOMNode, 1, "null")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ZEND_END_ARG_INFO()
-#define arginfo_class_DOM_HTML5Document_loadHTMLFile arginfo_class_DOM_HTML5Document_load
+ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveXMLFile, 0, 1, MAY_BE_LONG|MAY_BE_FALSE)
+ ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTML5Document_saveHTML, 0, 0, MAY_BE_STRING|MAY_BE_FALSE)
+ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveHTML, 0, 0, MAY_BE_STRING|MAY_BE_FALSE)
ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOMNode, 1, "null")
ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTML5Document_saveHTMLFile, 0, 1, MAY_BE_LONG|MAY_BE_FALSE)
+ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveHTMLFile, 0, 1, MAY_BE_LONG|MAY_BE_FALSE)
ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0)
ZEND_END_ARG_INFO()
+#define arginfo_class_DOM_XMLDocument___construct arginfo_class_DOMDocumentFragment___construct
+
+ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_XMLDocument_createEmpty, 0, 0, DOM\\XMLDocument, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, version, IS_STRING, 0, "\"1.0\"")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 0, "\"UTF-8\"")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_XMLDocument_createFromFile, 0, 1, DOM\\XMLDocument, 0)
+ ZEND_ARG_TYPE_INFO(0, path, IS_STRING, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_XMLDocument_createFromString, 0, 1, DOM\\XMLDocument, 0)
+ ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ZEND_END_ARG_INFO()
+
+#define arginfo_class_DOM_XMLDocument_createEntityReference arginfo_class_DOMDocument_createEntityReference
+
+#define arginfo_class_DOM_XMLDocument_validate arginfo_class_DOMNode_hasAttributes
+
+#define arginfo_class_DOM_XMLDocument_xinclude arginfo_class_DOMDocument_xinclude
+
+#define arginfo_class_DOM_XMLDocument_saveXML arginfo_class_DOMDocument_saveXML
+
+#define arginfo_class_DOM_XMLDocument_saveXMLFile arginfo_class_DOMDocument_save
+
ZEND_FUNCTION(dom_import_simplexml);
ZEND_METHOD(DOMCdataSection, __construct);
@@ -724,13 +753,15 @@ ZEND_METHOD(DOM_Document, relaxNGValidate);
ZEND_METHOD(DOM_Document, relaxNGValidateSource);
#endif
ZEND_METHOD(DOM_Document, adoptNode);
-ZEND_METHOD(DOM_HTML5Document, __construct);
-ZEND_METHOD(DOM_HTML5Document, load);
-ZEND_METHOD(DOM_HTML5Document, loadXML);
-ZEND_METHOD(DOM_HTML5Document, loadHTML);
-ZEND_METHOD(DOM_HTML5Document, loadHTMLFile);
-ZEND_METHOD(DOM_HTML5Document, saveHTML);
-ZEND_METHOD(DOM_HTML5Document, saveHTMLFile);
+ZEND_METHOD(DOM_HTMLDocument, __construct);
+ZEND_METHOD(DOM_HTMLDocument, createEmpty);
+ZEND_METHOD(DOM_HTMLDocument, createFromFile);
+ZEND_METHOD(DOM_HTMLDocument, createFromString);
+ZEND_METHOD(DOM_HTMLDocument, saveHTML);
+ZEND_METHOD(DOM_HTMLDocument, saveHTMLFile);
+ZEND_METHOD(DOM_XMLDocument, createEmpty);
+ZEND_METHOD(DOM_XMLDocument, createFromFile);
+ZEND_METHOD(DOM_XMLDocument, createFromString);
static const zend_function_entry ext_functions[] = {
@@ -1011,14 +1042,29 @@ static const zend_function_entry class_DOM_Document_methods[] = {
};
-static const zend_function_entry class_DOM_HTML5Document_methods[] = {
- ZEND_ME(DOM_HTML5Document, __construct, arginfo_class_DOM_HTML5Document___construct, ZEND_ACC_PUBLIC)
- ZEND_ME(DOM_HTML5Document, load, arginfo_class_DOM_HTML5Document_load, ZEND_ACC_PUBLIC)
- ZEND_ME(DOM_HTML5Document, loadXML, arginfo_class_DOM_HTML5Document_loadXML, ZEND_ACC_PUBLIC)
- ZEND_ME(DOM_HTML5Document, loadHTML, arginfo_class_DOM_HTML5Document_loadHTML, ZEND_ACC_PUBLIC)
- ZEND_ME(DOM_HTML5Document, loadHTMLFile, arginfo_class_DOM_HTML5Document_loadHTMLFile, ZEND_ACC_PUBLIC)
- ZEND_ME(DOM_HTML5Document, saveHTML, arginfo_class_DOM_HTML5Document_saveHTML, ZEND_ACC_PUBLIC)
- ZEND_ME(DOM_HTML5Document, saveHTMLFile, arginfo_class_DOM_HTML5Document_saveHTMLFile, ZEND_ACC_PUBLIC)
+static const zend_function_entry class_DOM_HTMLDocument_methods[] = {
+ ZEND_ME(DOM_HTMLDocument, __construct, arginfo_class_DOM_HTMLDocument___construct, ZEND_ACC_PRIVATE)
+ ZEND_ME(DOM_HTMLDocument, createEmpty, arginfo_class_DOM_HTMLDocument_createEmpty, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
+ ZEND_ME(DOM_HTMLDocument, createFromFile, arginfo_class_DOM_HTMLDocument_createFromFile, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
+ ZEND_ME(DOM_HTMLDocument, createFromString, arginfo_class_DOM_HTMLDocument_createFromString, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
+ ZEND_MALIAS(DOMDocument, saveXML, saveXML, arginfo_class_DOM_HTMLDocument_saveXML, ZEND_ACC_PUBLIC)
+ ZEND_MALIAS(DOMDocument, saveXMLFile, save, arginfo_class_DOM_HTMLDocument_saveXMLFile, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_HTMLDocument, saveHTML, arginfo_class_DOM_HTMLDocument_saveHTML, ZEND_ACC_PUBLIC)
+ ZEND_ME(DOM_HTMLDocument, saveHTMLFile, arginfo_class_DOM_HTMLDocument_saveHTMLFile, ZEND_ACC_PUBLIC)
+ ZEND_FE_END
+};
+
+
+static const zend_function_entry class_DOM_XMLDocument_methods[] = {
+ ZEND_MALIAS(DOM_HTMLDocument, __construct, __construct, arginfo_class_DOM_XMLDocument___construct, ZEND_ACC_PRIVATE)
+ ZEND_ME(DOM_XMLDocument, createEmpty, arginfo_class_DOM_XMLDocument_createEmpty, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
+ ZEND_ME(DOM_XMLDocument, createFromFile, arginfo_class_DOM_XMLDocument_createFromFile, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
+ ZEND_ME(DOM_XMLDocument, createFromString, arginfo_class_DOM_XMLDocument_createFromString, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
+ ZEND_MALIAS(DOMDocument, createEntityReference, createEntityReference, arginfo_class_DOM_XMLDocument_createEntityReference, ZEND_ACC_PUBLIC)
+ ZEND_MALIAS(DOMDocument, validate, validate, arginfo_class_DOM_XMLDocument_validate, ZEND_ACC_PUBLIC)
+ ZEND_MALIAS(DOMDocument, xinclude, xinclude, arginfo_class_DOM_XMLDocument_xinclude, ZEND_ACC_PUBLIC)
+ ZEND_MALIAS(DOMDocument, saveXML, saveXML, arginfo_class_DOM_XMLDocument_saveXML, ZEND_ACC_PUBLIC)
+ ZEND_MALIAS(DOMDocument, saveXMLFile, save, arginfo_class_DOM_XMLDocument_saveXMLFile, ZEND_ACC_PUBLIC)
ZEND_FE_END
};
@@ -1312,8 +1358,8 @@ static zend_class_entry *register_class_DOMNode(void)
zval property_ownerDocument_default_value;
ZVAL_UNDEF(&property_ownerDocument_default_value);
zend_string *property_ownerDocument_name = zend_string_init("ownerDocument", sizeof("ownerDocument") - 1, 1);
- zend_string *property_ownerDocument_class_DOMDocument = zend_string_init("DOMDocument", sizeof("DOMDocument")-1, 1);
- zend_declare_typed_property(class_entry, property_ownerDocument_name, &property_ownerDocument_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_ownerDocument_class_DOMDocument, 0, MAY_BE_NULL));
+ zend_string *property_ownerDocument_class_DOM_Document = zend_string_init("DOM\\Document", sizeof("DOM\\Document")-1, 1);
+ zend_declare_typed_property(class_entry, property_ownerDocument_name, &property_ownerDocument_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_ownerDocument_class_DOM_Document, 0, MAY_BE_NULL));
zend_string_release(property_ownerDocument_name);
zval property_namespaceURI_default_value;
@@ -1429,7 +1475,6 @@ static zend_class_entry *register_class_DOMImplementation(void)
INIT_CLASS_ENTRY(ce, "DOMImplementation", class_DOMImplementation_methods);
class_entry = zend_register_internal_class_ex(&ce, NULL);
- zend_register_class_alias("DOM\\Implementation", class_entry);
return class_entry;
}
@@ -1641,6 +1686,13 @@ static zend_class_entry *register_class_DOMDocument(zend_class_entry *class_entr
INIT_CLASS_ENTRY(ce, "DOMDocument", class_DOMDocument_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOM_Document);
+ zval property_implementation_default_value;
+ ZVAL_UNDEF(&property_implementation_default_value);
+ zend_string *property_implementation_name = zend_string_init("implementation", sizeof("implementation") - 1, 1);
+ zend_string *property_implementation_class_DOMImplementation = zend_string_init("DOMImplementation", sizeof("DOMImplementation")-1, 1);
+ zend_declare_typed_property(class_entry, property_implementation_name, &property_implementation_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_implementation_class_DOMImplementation, 0, 0));
+ zend_string_release(property_implementation_name);
+
zval property_actualEncoding_default_value;
ZVAL_UNDEF(&property_actualEncoding_default_value);
zend_string *property_actualEncoding_name = zend_string_init("actualEncoding", sizeof("actualEncoding") - 1, 1);
@@ -1892,8 +1944,8 @@ static zend_class_entry *register_class_DOMXPath(void)
zval property_document_default_value;
ZVAL_UNDEF(&property_document_default_value);
zend_string *property_document_name = zend_string_init("document", sizeof("document") - 1, 1);
- zend_string *property_document_class_DOMDocument = zend_string_init("DOMDocument", sizeof("DOMDocument")-1, 1);
- zend_declare_typed_property(class_entry, property_document_name, &property_document_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_document_class_DOMDocument, 0, 0));
+ zend_string *property_document_class_DOM_Document = zend_string_init("DOM\\Document", sizeof("DOM\\Document")-1, 1);
+ zend_declare_typed_property(class_entry, property_document_name, &property_document_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_document_class_DOM_Document, 0, 0));
zend_string_release(property_document_name);
zval property_registerNodeNamespaces_default_value;
@@ -1922,13 +1974,6 @@ static zend_class_entry *register_class_DOM_Document(zend_class_entry *class_ent
zend_declare_typed_property(class_entry, property_doctype_name, &property_doctype_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_doctype_class_DOM_DocumentType, 0, MAY_BE_NULL));
zend_string_release(property_doctype_name);
- zval property_implementation_default_value;
- ZVAL_UNDEF(&property_implementation_default_value);
- zend_string *property_implementation_name = zend_string_init("implementation", sizeof("implementation") - 1, 1);
- zend_string *property_implementation_class_DOM_Implementation = zend_string_init("DOM\\Implementation", sizeof("DOM\\Implementation")-1, 1);
- zend_declare_typed_property(class_entry, property_implementation_name, &property_implementation_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_implementation_class_DOM_Implementation, 0, 0));
- zend_string_release(property_implementation_name);
-
zval property_documentElement_default_value;
ZVAL_UNDEF(&property_documentElement_default_value);
zend_string *property_documentElement_name = zend_string_init("documentElement", sizeof("documentElement") - 1, 1);
@@ -1977,12 +2022,90 @@ static zend_class_entry *register_class_DOM_Document(zend_class_entry *class_ent
return class_entry;
}
-static zend_class_entry *register_class_DOM_HTML5Document(zend_class_entry *class_entry_DOMDocument)
+static zend_class_entry *register_class_DOM_HTMLDocument(zend_class_entry *class_entry_DOM_DOM_Document)
{
zend_class_entry ce, *class_entry;
- INIT_NS_CLASS_ENTRY(ce, "DOM", "HTML5Document", class_DOM_HTML5Document_methods);
- class_entry = zend_register_internal_class_ex(&ce, class_entry_DOMDocument);
+ INIT_NS_CLASS_ENTRY(ce, "DOM", "HTMLDocument", class_DOM_HTMLDocument_methods);
+ class_entry = zend_register_internal_class_ex(&ce, class_entry_DOM_DOM_Document);
+ class_entry->ce_flags |= ZEND_ACC_FINAL;
+
+ return class_entry;
+}
+
+static zend_class_entry *register_class_DOM_XMLDocument(zend_class_entry *class_entry_DOM_DOM_Document)
+{
+ zend_class_entry ce, *class_entry;
+
+ INIT_NS_CLASS_ENTRY(ce, "DOM", "XMLDocument", class_DOM_XMLDocument_methods);
+ class_entry = zend_register_internal_class_ex(&ce, class_entry_DOM_DOM_Document);
+ class_entry->ce_flags |= ZEND_ACC_FINAL;
+
+ zval property_xmlEncoding_default_value;
+ ZVAL_UNDEF(&property_xmlEncoding_default_value);
+ zend_string *property_xmlEncoding_name = zend_string_init("xmlEncoding", sizeof("xmlEncoding") - 1, 1);
+ zend_declare_typed_property(class_entry, property_xmlEncoding_name, &property_xmlEncoding_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING|MAY_BE_NULL));
+ zend_string_release(property_xmlEncoding_name);
+
+ zval property_standalone_default_value;
+ ZVAL_UNDEF(&property_standalone_default_value);
+ zend_string *property_standalone_name = zend_string_init("standalone", sizeof("standalone") - 1, 1);
+ zend_declare_typed_property(class_entry, property_standalone_name, &property_standalone_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_BOOL));
+ zend_string_release(property_standalone_name);
+
+ zval property_xmlStandalone_default_value;
+ ZVAL_UNDEF(&property_xmlStandalone_default_value);
+ zend_string *property_xmlStandalone_name = zend_string_init("xmlStandalone", sizeof("xmlStandalone") - 1, 1);
+ zend_declare_typed_property(class_entry, property_xmlStandalone_name, &property_xmlStandalone_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_BOOL));
+ zend_string_release(property_xmlStandalone_name);
+
+ zval property_version_default_value;
+ ZVAL_UNDEF(&property_version_default_value);
+ zend_string *property_version_name = zend_string_init("version", sizeof("version") - 1, 1);
+ zend_declare_typed_property(class_entry, property_version_name, &property_version_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING|MAY_BE_NULL));
+ zend_string_release(property_version_name);
+
+ zval property_xmlVersion_default_value;
+ ZVAL_UNDEF(&property_xmlVersion_default_value);
+ zend_string *property_xmlVersion_name = zend_string_init("xmlVersion", sizeof("xmlVersion") - 1, 1);
+ zend_declare_typed_property(class_entry, property_xmlVersion_name, &property_xmlVersion_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING|MAY_BE_NULL));
+ zend_string_release(property_xmlVersion_name);
+
+ zval property_formatOutput_default_value;
+ ZVAL_UNDEF(&property_formatOutput_default_value);
+ zend_string *property_formatOutput_name = zend_string_init("formatOutput", sizeof("formatOutput") - 1, 1);
+ zend_declare_typed_property(class_entry, property_formatOutput_name, &property_formatOutput_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_BOOL));
+ zend_string_release(property_formatOutput_name);
+
+ zval property_validateOnParse_default_value;
+ ZVAL_UNDEF(&property_validateOnParse_default_value);
+ zend_string *property_validateOnParse_name = zend_string_init("validateOnParse", sizeof("validateOnParse") - 1, 1);
+ zend_declare_typed_property(class_entry, property_validateOnParse_name, &property_validateOnParse_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_BOOL));
+ zend_string_release(property_validateOnParse_name);
+
+ zval property_resolveExternals_default_value;
+ ZVAL_UNDEF(&property_resolveExternals_default_value);
+ zend_string *property_resolveExternals_name = zend_string_init("resolveExternals", sizeof("resolveExternals") - 1, 1);
+ zend_declare_typed_property(class_entry, property_resolveExternals_name, &property_resolveExternals_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_BOOL));
+ zend_string_release(property_resolveExternals_name);
+
+ zval property_preserveWhiteSpace_default_value;
+ ZVAL_UNDEF(&property_preserveWhiteSpace_default_value);
+ zend_string *property_preserveWhiteSpace_name = zend_string_init("preserveWhiteSpace", sizeof("preserveWhiteSpace") - 1, 1);
+ zend_declare_typed_property(class_entry, property_preserveWhiteSpace_name, &property_preserveWhiteSpace_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_BOOL));
+ zend_string_release(property_preserveWhiteSpace_name);
+
+ zval property_recover_default_value;
+ ZVAL_UNDEF(&property_recover_default_value);
+ zend_string *property_recover_name = zend_string_init("recover", sizeof("recover") - 1, 1);
+ zend_declare_typed_property(class_entry, property_recover_name, &property_recover_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_BOOL));
+ zend_string_release(property_recover_name);
+
+ zval property_substituteEntities_default_value;
+ ZVAL_UNDEF(&property_substituteEntities_default_value);
+ zend_string *property_substituteEntities_name = zend_string_init("substituteEntities", sizeof("substituteEntities") - 1, 1);
+ zend_declare_typed_property(class_entry, property_substituteEntities_name, &property_substituteEntities_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_BOOL));
+ zend_string_release(property_substituteEntities_name);
return class_entry;
}
diff --git a/ext/dom/tests/DOMDocument_load_error6.phpt b/ext/dom/tests/DOMDocument_load_error6.phpt
index 948780f189657..a4f9bc4a02a87 100644
--- a/ext/dom/tests/DOMDocument_load_error6.phpt
+++ b/ext/dom/tests/DOMDocument_load_error6.phpt
@@ -23,5 +23,5 @@ var_dump($dom->load(str_repeat(" ", PHP_MAXPATHLEN + 1)));
?>
--EXPECT--
DOMDocument::load(): Argument #1 ($filename) must not be empty
-Path to document must not contain any null bytes
+DOMDocument::load(): Argument #1 ($filename) must not contain any null bytes
bool(false)
diff --git a/ext/dom/tests/DOMDocument_relaxNGValidate_error2.phpt b/ext/dom/tests/DOMDocument_relaxNGValidate_error2.phpt
index 87d4c3317bd29..5dde5e8ced926 100644
--- a/ext/dom/tests/DOMDocument_relaxNGValidate_error2.phpt
+++ b/ext/dom/tests/DOMDocument_relaxNGValidate_error2.phpt
@@ -20,9 +20,9 @@ $result = $doc->relaxNGValidate($rng);
var_dump($result);
?>
--EXPECTF--
-Warning: DOM\Document::relaxNGValidate(): I/O warning : failed to load external entity "/home/niels/php-src/ext/dom/tests/foo.rng" in %s on line %d
+Warning: DOM\Document::relaxNGValidate(): I/O warning : failed to load external entity "%s/foo.rng" in %s on line %d
-Warning: DOM\Document::relaxNGValidate(): xmlRelaxNGParse: could not load /home/niels/php-src/ext/dom/tests/foo.rng in %s on line %d
+Warning: DOM\Document::relaxNGValidate(): xmlRelaxNGParse: could not load %s/foo.rng in %s on line %d
Warning: DOM\Document::relaxNGValidate(): Invalid RelaxNG in %s on line %d
bool(false)
diff --git a/ext/dom/tests/DOMDocument_schemaValidate_error1.phpt b/ext/dom/tests/DOMDocument_schemaValidate_error1.phpt
index b860007758caa..2923943a3c8f5 100644
--- a/ext/dom/tests/DOMDocument_schemaValidate_error1.phpt
+++ b/ext/dom/tests/DOMDocument_schemaValidate_error1.phpt
@@ -17,13 +17,13 @@ var_dump($result);
?>
--EXPECTF--
-Warning: DOM\Document::schemaValidate(): /home/niels/php-src/ext/dom/tests/book-not-a-schema.xsd:1: parser error : Start tag expected, '<' not found in %s on line %d
+Warning: DOM\Document::schemaValidate(): %s/book-not-a-schema.xsd:1: parser error : Start tag expected, '<' not found in %s on line %d
Warning: DOM\Document::schemaValidate(): Let's see what happens upon parsing a file that doesn't contain a schema. in %s on line %d
Warning: DOM\Document::schemaValidate(): ^ in %s on line %d
-Warning: DOM\Document::schemaValidate(): Failed to parse the XML resource '/home/niels/php-src/ext/dom/tests/book-not-a-schema.xsd'. in %s on line %d
+Warning: DOM\Document::schemaValidate(): Failed to parse the XML resource '%s/book-not-a-schema.xsd'. in %s on line %d
Warning: DOM\Document::schemaValidate(): Invalid Schema in %s on line %d
bool(false)
diff --git a/ext/dom/tests/DOMDocument_schemaValidate_error5.phpt b/ext/dom/tests/DOMDocument_schemaValidate_error5.phpt
index 888753302847d..2c179ed35bd45 100644
--- a/ext/dom/tests/DOMDocument_schemaValidate_error5.phpt
+++ b/ext/dom/tests/DOMDocument_schemaValidate_error5.phpt
@@ -17,9 +17,9 @@ var_dump($result);
?>
--EXPECTF--
-Warning: DOM\Document::schemaValidate(): I/O warning : failed to load external entity "/home/niels/php-src/ext/dom/tests/non-existent-file" in %s on line %d
+Warning: DOM\Document::schemaValidate(): I/O warning : failed to load external entity "%s/non-existent-file" in %s on line %d
-Warning: DOM\Document::schemaValidate(): Failed to locate the main schema resource at '/home/niels/php-src/ext/dom/tests/non-existent-file'. in %s on line %d
+Warning: DOM\Document::schemaValidate(): Failed to locate the main schema resource at '%s/non-existent-file'. in %s on line %d
Warning: DOM\Document::schemaValidate(): Invalid Schema in %s on line %d
bool(false)
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_unicode_error.phpt b/ext/dom/tests/HTML5/encoding/Document_encoding_unicode_error.phpt
deleted file mode 100644
index 22c7a90fc30c6..0000000000000
--- a/ext/dom/tests/HTML5/encoding/Document_encoding_unicode_error.phpt
+++ /dev/null
@@ -1,27 +0,0 @@
---TEST--
-HTML5Document::loadHTML(File) with unicode codepoints resulting in an error
---EXTENSIONS--
-dom
---FILE--
-loadHTMLFile(__DIR__."/utf16le_error.html");
-echo "--- loadHTML ---\n";
-$dom->loadHTML(file_get_contents(__DIR__."/utf16le_error.html"));
-?>
---EXPECTF--
---- loadHTMLFile ---
-
-Warning: DOM\HTML5Document::loadHTMLFile(): tokenizer error missing-end-tag-name in %s, line: 7, column: 29 in %s on line %d
-
-Warning: DOM\HTML5Document::loadHTMLFile(): tree error unexpected-token in %s, line: 7, column: 14-17 in %s on line %d
-
-Warning: DOM\HTML5Document::loadHTMLFile(): tree error unexpected-token in %s, line: 8, column: 7-10 in %s on line %d
---- loadHTML ---
-
-Warning: DOM\HTML5Document::loadHTML(): tokenizer error missing-end-tag-name in Entity, line: 7, column: 29 in %s on line %d
-
-Warning: DOM\HTML5Document::loadHTML(): tree error unexpected-token in Entity, line: 7, column: 14-17 in %s on line %d
-
-Warning: DOM\HTML5Document::loadHTML(): tree error unexpected-token in Entity, line: 8, column: 7-10 in %s on line %d
diff --git a/ext/dom/tests/HTML5/encoding/Document_load_different_encoding.phpt b/ext/dom/tests/HTML5/encoding/Document_load_different_encoding.phpt
deleted file mode 100644
index 866e9223f9ef8..0000000000000
--- a/ext/dom/tests/HTML5/encoding/Document_load_different_encoding.phpt
+++ /dev/null
@@ -1,19 +0,0 @@
---TEST--
-DOM\HTML5Document load document with different encoding
---EXTENSIONS--
-dom
---FILE--
-loadHTMLFile(__DIR__ . "/windows1251.html");
-var_dump($dom->encoding);
-$dom->loadHTML("hé
", LIBXML_NOERROR);
-var_dump($dom->encoding);
-echo $dom->saveHTML();
-
-?>
---EXPECT--
-string(12) "windows-1251"
-string(5) "UTF-8"
-hé
diff --git a/ext/dom/tests/HTML5/interactions/Document_clone.phpt b/ext/dom/tests/HTML5/interactions/Document_clone.phpt
deleted file mode 100644
index 42c9e8bb9f788..0000000000000
--- a/ext/dom/tests/HTML5/interactions/Document_clone.phpt
+++ /dev/null
@@ -1,31 +0,0 @@
---TEST--
-Cloning a DOM\HTML5Document
---EXTENSIONS--
-dom
---FILE--
-loadHTML("foo
");
-
-$dom2 = clone $dom;
-var_dump($dom2->firstChild->tagName);
-var_dump($dom2->firstChild->textContent);
-$dom2->loadHTML("bar ");
-var_dump($dom2->firstChild->tagName);
-var_dump($dom2->firstChild->textContent);
-
-$element = $dom2->firstChild;
-unset($dom2);
-var_dump(get_class($element->ownerDocument));
-
-?>
---EXPECTF--
-Warning: DOM\HTML5Document::loadHTML(): tree error unexpected-token-in-initial-mode in Entity, line: 1, column: 2 in %s on line %d
-string(4) "html"
-string(3) "foo"
-
-Warning: DOM\HTML5Document::loadHTML(): tree error unexpected-token-in-initial-mode in Entity, line: 1, column: 2-7 in %s on line %d
-string(4) "html"
-string(3) "bar"
-string(17) "DOM\HTML5Document"
diff --git a/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_01.phpt b/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_01.phpt
deleted file mode 100644
index 398d80393b323..0000000000000
--- a/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_01.phpt
+++ /dev/null
@@ -1,28 +0,0 @@
---TEST--
-DOM\HTML5Document::registerNodeClass 01
---EXTENSIONS--
-dom
---FILE--
-firstChild->textContent;
- }
-}
-
-$dom = new DOM\HTML5Document();
-$dom->registerNodeClass("DOM\HTML5Document", "CustomDOMHTML5Document");
-$dom->loadHTML("foo
", LIBXML_NOERROR);
-
-$element = $dom->documentElement;
-unset($dom);
-
-$dom = $element->ownerDocument;
-var_dump($dom instanceof CustomDOMHTML5Document);
-var_dump($dom->test());
-
-?>
---EXPECT--
-bool(true)
-string(3) "foo"
diff --git a/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_03.phpt b/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_03.phpt
deleted file mode 100644
index 10609846de093..0000000000000
--- a/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_03.phpt
+++ /dev/null
@@ -1,26 +0,0 @@
---TEST--
-DOM\HTML5Document::registerNodeClass 03
---EXTENSIONS--
-dom
---FILE--
-registerNodeClass("DOMDocument", "DOM\\HTML5Document");
-
-$element = $dom->appendChild($dom->createElement("foo"));
-unset($dom);
-
-var_dump(get_class($element->ownerDocument));
-
-$dom = $element->ownerDocument;
-unset($element);
-$element = $dom->documentElement;
-unset($dom);
-
-var_dump(get_class($element->ownerDocument));
-
-?>
---EXPECT--
-string(17) "DOM\HTML5Document"
-string(17) "DOM\HTML5Document"
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_empty_path.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_empty_path.phpt
deleted file mode 100644
index 6b71390fe183b..0000000000000
--- a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_empty_path.phpt
+++ /dev/null
@@ -1,19 +0,0 @@
---TEST--
-DOM\HTML5Document::loadHTMLFile() - empty path
---EXTENSIONS--
-dom
---FILE--
-loadHTMLFile("");
-echo $dom->saveHTML(), "\n";
-
-?>
---EXPECTF--
-Fatal error: Uncaught ValueError: Path cannot be empty in %s:%d
-Stack trace:
-#0 %s(%d): DOM\HTML5Document->loadHTMLFile('')
-#1 {main}
- thrown in %s on line %d
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_existing_file.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_existing_file.phpt
deleted file mode 100644
index 5bb31af73ab12..0000000000000
--- a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_existing_file.phpt
+++ /dev/null
@@ -1,24 +0,0 @@
---TEST--
-DOM\HTML5Document::loadHTMLFile() - local existing file
---EXTENSIONS--
-dom
---FILE--
-loadHTMLFile(__DIR__ . "/../../test.html");
-echo $dom->saveHTML(), "\n";
-
-?>
---EXPECTF--
-Warning: DOM\HTML5Document::loadHTMLFile(): tree error unexpected-token-in-initial-mode in %s on line %d
-
-Hello world
-
-
-This is a not well-formed
-html files with undeclared entities
-
-
-
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_file_does_not_exist.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_file_does_not_exist.phpt
deleted file mode 100644
index 9bc624a8d9109..0000000000000
--- a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_local_file_does_not_exist.phpt
+++ /dev/null
@@ -1,15 +0,0 @@
---TEST--
-DOM\HTML5Document::loadHTMLFile() - local file that does not exist
---EXTENSIONS--
-dom
---FILE--
-loadHTMLFile(__DIR__ . "/foobar");
-echo $dom->saveHTML(), "\n";
-
-?>
---EXPECTF--
-Warning: DOM\HTML5Document::loadHTMLFile(%s): Failed to open stream: No such file or directory in %s on line %d
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_nul_terminator_cases_path.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_nul_terminator_cases_path.phpt
deleted file mode 100644
index cdad0bf1c28a2..0000000000000
--- a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_nul_terminator_cases_path.phpt
+++ /dev/null
@@ -1,21 +0,0 @@
---TEST--
-DOM\HTML5Document::loadHTMLFile() - NUL terminator cases path
---EXTENSIONS--
-dom
---FILE--
-loadHTMLFile("\0");
-} catch (Error $e) {
- echo $e->getMessage(), "\n";
-}
-$dom->loadHTMLFile('%00');
-
-?>
---EXPECTF--
-DOM\HTML5Document::loadHTMLFile(): Argument #1 ($filename) must not contain any null bytes
-
-Warning: DOM\HTML5Document::loadHTMLFile(): URI must not contain percent-encoded NUL bytes in %s on line %d
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_01.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_01.phpt
deleted file mode 100644
index 7f7bef7ee3e97..0000000000000
--- a/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_01.phpt
+++ /dev/null
@@ -1,24 +0,0 @@
---TEST--
-DOM\HTML5Document::loadHTML() - parser warning 01
---EXTENSIONS--
-dom
---FILE--
-loadHTML($html);
-echo $dom->saveHTML(), "\n";
-
-?>
---EXPECTF--
-Warning: DOM\HTML5Document::loadHTML(): tokenizer error missing-end-tag-name in Entity, line: 7, column: 11 in %s on line %d
-
-Warning: DOM\HTML5Document::loadHTML(): tree error unexpected-token-in-initial-mode in Entity, line: 1, column: 2-6 in %s on line %d
-foo
-
-
-
-
-
-error
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_02.phpt b/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_02.phpt
deleted file mode 100644
index 85639e249f901..0000000000000
--- a/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_02.phpt
+++ /dev/null
@@ -1,33 +0,0 @@
---TEST--
-DOM\HTML5Document::loadHTML() - parser warning 02
---EXTENSIONS--
-dom
---FILE--
-loadHTML($html);
-echo $dom->saveHTML(), "\n";
-
-?>
---EXPECTF--
-Warning: DOM\HTML5Document::loadHTML(): tokenizer error unexpected-null-character in Entity, line: 4, column: 11 in %s on line %d
-
-Warning: DOM\HTML5Document::loadHTML(): tokenizer error missing-whitespace-between-attributes in Entity, line: 5, column: 20 in %s on line %d
-
-Warning: DOM\HTML5Document::loadHTML(): tokenizer error incorrectly-opened-comment in Entity, line: 6, column: 11 in %s on line %d
-
-Warning: DOM\HTML5Document::loadHTML(): tokenizer error nested-comment in Entity, line: 7, column: 18 in %s on line %d
-
-Warning: DOM\HTML5Document::loadHTML(): tree error unexpected-closed-token in Entity, line: 4, column: 18 in %s on line %d
-
-Warning: DOM\HTML5Document::loadHTML(): tree error doctype-token-in-body-mode in Entity, line: 8, column: 11-17 in %s on line %d
-
- foo
-
-
- -->
-
-
-
diff --git a/ext/dom/tests/HTML5/parser/Document_load_options.phpt b/ext/dom/tests/HTML5/parser/Document_load_options.phpt
deleted file mode 100644
index 933637cf7a975..0000000000000
--- a/ext/dom/tests/HTML5/parser/Document_load_options.phpt
+++ /dev/null
@@ -1,109 +0,0 @@
---TEST--
-HTML5Document: loading $options check
---EXTENSIONS--
-dom
---FILE--
-{$method}("x", $options);
- } catch (ValueError $e) {
- echo $e->getMessage(), "\n";
- }
- }
-}
-
-?>
---EXPECTF--
---- Method loadHTML ---
-int(%d)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(4194304)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(524288)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(8)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(4)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(16)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(4)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(256)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(16384)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(4)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(2)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(1024)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(1)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(2048)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(64)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(128)
-DOM\HTML5Document::loadHTML(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
---- Method loadHTMLFile ---
-int(%d)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(4194304)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(524288)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(8)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(4)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(16)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(4)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(256)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(16384)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(4)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(2)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(1024)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(1)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(2048)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(64)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
-int(128)
-DOM\HTML5Document::loadHTMLFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
diff --git a/ext/dom/tests/domobject_debug_handler.phpt b/ext/dom/tests/domobject_debug_handler.phpt
index a6e04a58d0bc5..c97655a63a14d 100644
--- a/ext/dom/tests/domobject_debug_handler.phpt
+++ b/ext/dom/tests/domobject_debug_handler.phpt
@@ -20,8 +20,12 @@ object(DOMDocument)#1 (41) {
["dynamicProperty"]=>
object(stdClass)#2 (0) {
}
+ ["implementation"]=>
+ string(22) "(object value omitted)"
["actualEncoding"]=>
NULL
+ ["config"]=>
+ NULL
["encoding"]=>
NULL
["xmlEncoding"]=>
@@ -34,8 +38,6 @@ object(DOMDocument)#1 (41) {
string(3) "1.0"
["xmlVersion"]=>
string(3) "1.0"
- ["config"]=>
- NULL
["formatOutput"]=>
bool(false)
["validateOnParse"]=>
@@ -50,8 +52,6 @@ object(DOMDocument)#1 (41) {
bool(false)
["doctype"]=>
NULL
- ["implementation"]=>
- string(22) "(object value omitted)"
["documentElement"]=>
string(22) "(object value omitted)"
["strictErrorChecking"]=>
diff --git a/ext/dom/tests/HTML5/encoding/Document_GB18030.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_GB18030.phpt
similarity index 86%
rename from ext/dom/tests/HTML5/encoding/Document_GB18030.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_GB18030.phpt
index 01c347a08a5f7..c0139661e3077 100644
--- a/ext/dom/tests/HTML5/encoding/Document_GB18030.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_GB18030.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document GB18030 encoding test
+DOM\HTMLDocument GB18030 encoding test
--EXTENSIONS--
dom
--FILE--
loadHTMLFile(__DIR__ . "/gb18030.html");
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/gb18030.html");
var_dump($dom->encoding);
$dom->documentElement->firstChild->nextElementSibling->textContent = "é";
$output = $dom->saveHTML();
diff --git a/ext/dom/tests/HTML5/encoding/Document_Shift_JIS.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_Shift_JIS.phpt
similarity index 86%
rename from ext/dom/tests/HTML5/encoding/Document_Shift_JIS.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_Shift_JIS.phpt
index 8a31721957bbb..c021d7022f275 100644
--- a/ext/dom/tests/HTML5/encoding/Document_Shift_JIS.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_Shift_JIS.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document Shift JIS encoding test
+DOM\HTMLDocument Shift JIS encoding test
--EXTENSIONS--
dom
--FILE--
loadHTMLFile(__DIR__ . "/shift_jis.html");
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/shift_jis.html");
var_dump($dom->encoding);
$dom->documentElement->firstChild->nextElementSibling->textContent .= "é";
$output = $dom->saveHTML();
diff --git a/ext/dom/tests/HTML5/encoding/Document_UTF16BE_BOM.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_UTF16BE_BOM.phpt
similarity index 89%
rename from ext/dom/tests/HTML5/encoding/Document_UTF16BE_BOM.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_UTF16BE_BOM.phpt
index 7a05541f89ff8..9e10859f1914c 100644
--- a/ext/dom/tests/HTML5/encoding/Document_UTF16BE_BOM.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_UTF16BE_BOM.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document UTF-16BE BOM encoding test
+DOM\HTMLDocument UTF-16BE BOM encoding test
--EXTENSIONS--
dom
--FILE--
loadHTMLFile(__DIR__ . "/utf16be_bom.html");
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/utf16be_bom.html");
var_dump($dom->encoding);
$dom->documentElement->firstChild->nextElementSibling->textContent = "é";
$output = $dom->saveHTML();
diff --git a/ext/dom/tests/HTML5/encoding/Document_UTF16LE_BOM.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_UTF16LE_BOM.phpt
similarity index 89%
rename from ext/dom/tests/HTML5/encoding/Document_UTF16LE_BOM.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_UTF16LE_BOM.phpt
index c713a50846792..7006163e3de07 100644
--- a/ext/dom/tests/HTML5/encoding/Document_UTF16LE_BOM.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_UTF16LE_BOM.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document UTF-16LE BOM encoding test
+DOM\HTMLDocument UTF-16LE BOM encoding test
--EXTENSIONS--
dom
--FILE--
loadHTMLFile(__DIR__ . "/utf16le_bom.html");
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/utf16le_bom.html");
var_dump($dom->encoding);
$dom->documentElement->firstChild->nextElementSibling->textContent = "é";
$output = $dom->saveHTML();
diff --git a/ext/dom/tests/HTML5/encoding/Document_UTF8_BOM.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_UTF8_BOM.phpt
similarity index 86%
rename from ext/dom/tests/HTML5/encoding/Document_UTF8_BOM.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_UTF8_BOM.phpt
index f6901198f0327..65326ad6b0370 100644
--- a/ext/dom/tests/HTML5/encoding/Document_UTF8_BOM.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_UTF8_BOM.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document UTF-8 BOM encoding test
+DOM\HTMLDocument UTF-8 BOM encoding test
--EXTENSIONS--
dom
--FILE--
loadHTMLFile(__DIR__ . "/utf8_bom.html");
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/utf8_bom.html");
var_dump($dom->encoding);
$dom->documentElement->firstChild->nextElementSibling->textContent = "é";
$output = $dom->saveHTML();
diff --git a/ext/dom/tests/HTML5/encoding/Document_Windows1251.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_Windows1251.phpt
similarity index 87%
rename from ext/dom/tests/HTML5/encoding/Document_Windows1251.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_Windows1251.phpt
index f631584f2f8df..5b85639996d9c 100644
--- a/ext/dom/tests/HTML5/encoding/Document_Windows1251.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_Windows1251.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document Windows-1251 encoding test
+DOM\HTMLDocument Windows-1251 encoding test
--EXTENSIONS--
dom
--FILE--
loadHTMLFile(__DIR__ . "/windows1251.html");
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/windows1251.html");
var_dump($dom->encoding);
$dom->documentElement->firstChild->nextElementSibling->textContent .= "é"; // Note: won't show up in Windows 1251 because it doesn't exist there
$output = $dom->saveHTML();
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_01.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_01.phpt
similarity index 98%
rename from ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_01.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_01.phpt
index 069c46c162247..3988767e58064 100644
--- a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_01.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_01.phpt
@@ -1,13 +1,13 @@
--TEST--
-DOM\HTML5Document edge case encoding 01
+DOM\HTMLDocument edge case encoding 01
--EXTENSIONS--
dom
--FILE--
UTF-8
-$dom = new DOM\HTML5Document();
// Create a UTF-8 string where a UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
+$dom = DOM\HTMLDocument::createEmpty();
$dom->append(str_repeat("A", 4096 - 2) . "\xf0\x90\x8d\x88AA");
var_dump($dom->saveHTML());
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_02.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_02.phpt
similarity index 97%
rename from ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_02.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_02.phpt
index 3c6cf0e715547..5ac5694a89dc3 100644
--- a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_02.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_02.phpt
@@ -1,13 +1,12 @@
--TEST--
-DOM\HTML5Document edge case encoding 02
+DOM\HTMLDocument edge case encoding 02
--EXTENSIONS--
dom
--FILE--
GB18030
-$dom = new DOM\HTML5Document();
-$dom->encoding = "GB18030";
+$dom = DOM\HTMLDocument::createEmpty("GB18030");
// Create a UTF-8 string where a UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
// *and* the sequence also falls over the boundary for the result
$dom->append(str_repeat("A", 4096 - 2) . "\xf0\x90\x8d\x88AA");
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_03.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_03.phpt
similarity index 97%
rename from ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_03.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_03.phpt
index 47ee2fc8b8b32..d68257ca80fee 100644
--- a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_03.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_03.phpt
@@ -1,13 +1,12 @@
--TEST--
-DOM\HTML5Document edge case encoding 03
+DOM\HTMLDocument edge case encoding 03
--EXTENSIONS--
dom
--FILE--
GB18030
-$dom = new DOM\HTML5Document();
-$dom->encoding = "GB18030";
+$dom = DOM\HTMLDocument::createEmpty("GB18030");
// Create a UTF-8 string where an invalid UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
// Note: the strange ?1?7 sequence is the GB18030 encoding for the unicode replacement character
$dom->append(str_repeat("A", 4096 - 2) . "\xff\xff\xff");
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_04.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_04.phpt
similarity index 98%
rename from ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_04.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_04.phpt
index d3a16bde718ca..ccff90a83320f 100644
--- a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_04.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_04.phpt
@@ -1,12 +1,12 @@
--TEST--
-DOM\HTML5Document edge case encoding 04
+DOM\HTMLDocument edge case encoding 04
--EXTENSIONS--
dom
--FILE--
UTF-8
-$dom = new DOM\HTML5Document();
+$dom = DOM\HTMLDocument::createEmpty();
// Create a UTF-8 string where an invalid UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
$dom->append(str_repeat("A", 4096 - 2) . "\xff\xff\xff");
var_dump($dom->saveHTML());
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_05.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_05.phpt
similarity index 96%
rename from ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_05.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_05.phpt
index 747789398aaf4..e7da1e439b70a 100644
--- a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_05.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_05.phpt
@@ -1,17 +1,16 @@
--TEST--
-DOM\HTML5Document edge case encoding 05
+DOM\HTMLDocument edge case encoding 05
--EXTENSIONS--
dom
--FILE--
UTF-8
-$dom = new DOM\HTML5Document();
$header = " ";
$padding_required_until_4094 = 4094 - strlen($header);
-// GB18030 byte sequence crossing the 4096 boundary
$trailer = "\x90\x30\xd5\x30";
-$dom->loadHTML($header . str_repeat("A", $padding_required_until_4094) . $trailer);
+$dom = DOM\HTMLDocument::createFromString($header . str_repeat("A", $padding_required_until_4094) . $trailer);
+// GB18030 byte sequence crossing the 4096 boundary
var_dump($dom->encoding);
$dom->encoding = "UTF-8";
var_dump($dom->saveHTML());
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_06.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_06.phpt
similarity index 98%
rename from ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_06.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_06.phpt
index 633a48329d70e..0b472b466fbed 100644
--- a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_06.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_06.phpt
@@ -1,12 +1,12 @@
--TEST--
-DOM\HTML5Document edge case encoding 06
+DOM\HTMLDocument edge case encoding 06
--EXTENSIONS--
dom
--FILE--
UTF-8
-$dom = new DOM\HTML5Document();
+$dom = DOM\HTMLDocument::createEmpty();
// Create a UTF-8 string where a *broken* UTF-8 byte sequence falls over the boundary of the 4096 byte buffer
$dom->append(str_repeat("A", 4096 - 1) . "\xf0\x90");
var_dump($dom->saveHTML());
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_07.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_07.phpt
similarity index 96%
rename from ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_07.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_07.phpt
index 9758da6a7d12f..6cf2d1d8f5e4b 100644
--- a/ext/dom/tests/HTML5/encoding/Document_encoding_edge_case_07.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_edge_case_07.phpt
@@ -1,17 +1,16 @@
--TEST--
-DOM\HTML5Document edge case encoding 07
+DOM\HTMLDocument edge case encoding 07
--EXTENSIONS--
dom
--FILE--
UTF-8
-$dom = new DOM\HTML5Document();
$header = " ";
$padding_required_until_4095 = 4095 - strlen($header);
-// GB18030 *broken* byte sequence crossing the 4096 boundary
$trailer = "\x90\x30";
-$dom->loadHTML($header . str_repeat("A", $padding_required_until_4095) . $trailer);
+$dom = DOM\HTMLDocument::createFromString($header . str_repeat("A", $padding_required_until_4095) . $trailer);
+// GB18030 *broken* byte sequence crossing the 4096 boundary
var_dump($dom->encoding);
$dom->encoding = "UTF-8";
var_dump($dom->saveHTML());
diff --git a/ext/dom/tests/HTML5/encoding/Document_encoding_field_test.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_field_test.phpt
similarity index 65%
rename from ext/dom/tests/HTML5/encoding/Document_encoding_field_test.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_field_test.phpt
index c47c4b56c1980..dd9251608af84 100644
--- a/ext/dom/tests/HTML5/encoding/Document_encoding_field_test.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_field_test.phpt
@@ -1,11 +1,11 @@
--TEST--
-DOM\HTML5Document test values for encoding field
+DOM\HTMLDocument test values for encoding field
--EXTENSIONS--
dom
--FILE--
encoding);
$dom->encoding = "CSeuckr";
var_dump($dom->encoding);
@@ -25,12 +25,19 @@ try {
var_dump($dom->encoding);
echo $dom->saveHTML();
+try {
+ $dom = DOM\HTMLDocument::createEmpty("bogus");
+} catch (ValueError $e) {
+ echo $e->getMessage(), "\n";
+}
+
?>
--EXPECT--
-NULL
+string(5) "UTF-8"
string(6) "EUC-KR"
Invalid document encoding
string(6) "EUC-KR"
string(12) "windows-1251"
Invalid document encoding
string(12) "windows-1251"
+DOM\HTMLDocument::createEmpty(): Argument #1 ($encoding) is not a valid document encoding
diff --git a/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_unicode_error.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_unicode_error.phpt
new file mode 100644
index 0000000000000..7885f68ecf50b
--- /dev/null
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_unicode_error.phpt
@@ -0,0 +1,26 @@
+--TEST--
+DOM\HTMLDocument loading with unicode codepoints resulting in an error
+--EXTENSIONS--
+dom
+--FILE--
+
+--EXPECTF--
+--- createFromFile ---
+
+Warning: DOM\HTMLDocument::createFromFile(): tokenizer error missing-end-tag-name in %s on line %d
+
+Warning: DOM\HTMLDocument::createFromFile(): tree error unexpected-token in %s on line %d
+
+Warning: DOM\HTMLDocument::createFromFile(): tree error unexpected-token in %s on line %d
+--- createFromString ---
+
+Warning: DOM\HTMLDocument::createFromString(): tokenizer error missing-end-tag-name in Entity, line: 7, column: 29 in %s on line %d
+
+Warning: DOM\HTMLDocument::createFromString(): tree error unexpected-token in Entity, line: 7, column: 14-17 in %s on line %d
+
+Warning: DOM\HTMLDocument::createFromString(): tree error unexpected-token in Entity, line: 8, column: 7-10 in %s on line %d
diff --git a/ext/dom/tests/HTML5/encoding/Document_fallback_encoding.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_fallback_encoding.phpt
similarity index 68%
rename from ext/dom/tests/HTML5/encoding/Document_fallback_encoding.phpt
rename to ext/dom/tests/modern/html/encoding/HTMLDocument_fallback_encoding.phpt
index f3ec81d8f25f1..9ffd02dc5d7d8 100644
--- a/ext/dom/tests/HTML5/encoding/Document_fallback_encoding.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_fallback_encoding.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document fallback encoding test
+DOM\HTMLDocument fallback encoding test
--EXTENSIONS--
dom
--FILE--
loadHTMLFile(__DIR__ . "/fallback_encoding.html");
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/fallback_encoding.html");
var_dump($dom->encoding);
echo $dom->saveHTML();
diff --git a/ext/dom/tests/HTML5/encoding/fallback_encoding.html b/ext/dom/tests/modern/html/encoding/fallback_encoding.html
similarity index 100%
rename from ext/dom/tests/HTML5/encoding/fallback_encoding.html
rename to ext/dom/tests/modern/html/encoding/fallback_encoding.html
diff --git a/ext/dom/tests/HTML5/encoding/gb18030.html b/ext/dom/tests/modern/html/encoding/gb18030.html
similarity index 100%
rename from ext/dom/tests/HTML5/encoding/gb18030.html
rename to ext/dom/tests/modern/html/encoding/gb18030.html
diff --git a/ext/dom/tests/HTML5/encoding/shift_jis.html b/ext/dom/tests/modern/html/encoding/shift_jis.html
similarity index 100%
rename from ext/dom/tests/HTML5/encoding/shift_jis.html
rename to ext/dom/tests/modern/html/encoding/shift_jis.html
diff --git a/ext/dom/tests/HTML5/encoding/utf16be_bom.html b/ext/dom/tests/modern/html/encoding/utf16be_bom.html
similarity index 100%
rename from ext/dom/tests/HTML5/encoding/utf16be_bom.html
rename to ext/dom/tests/modern/html/encoding/utf16be_bom.html
diff --git a/ext/dom/tests/HTML5/encoding/utf16le_bom.html b/ext/dom/tests/modern/html/encoding/utf16le_bom.html
similarity index 100%
rename from ext/dom/tests/HTML5/encoding/utf16le_bom.html
rename to ext/dom/tests/modern/html/encoding/utf16le_bom.html
diff --git a/ext/dom/tests/HTML5/encoding/utf16le_error.html b/ext/dom/tests/modern/html/encoding/utf16le_error.html
similarity index 100%
rename from ext/dom/tests/HTML5/encoding/utf16le_error.html
rename to ext/dom/tests/modern/html/encoding/utf16le_error.html
diff --git a/ext/dom/tests/HTML5/encoding/utf8_bom.html b/ext/dom/tests/modern/html/encoding/utf8_bom.html
similarity index 100%
rename from ext/dom/tests/HTML5/encoding/utf8_bom.html
rename to ext/dom/tests/modern/html/encoding/utf8_bom.html
diff --git a/ext/dom/tests/HTML5/encoding/windows1251.html b/ext/dom/tests/modern/html/encoding/windows1251.html
similarity index 100%
rename from ext/dom/tests/HTML5/encoding/windows1251.html
rename to ext/dom/tests/modern/html/encoding/windows1251.html
diff --git a/ext/dom/tests/HTML5/interactions/Document_adopt_DOMDocument.phpt b/ext/dom/tests/modern/html/interactions/HTMLDocument_adopt_DOMDocument.phpt
similarity index 82%
rename from ext/dom/tests/HTML5/interactions/Document_adopt_DOMDocument.phpt
rename to ext/dom/tests/modern/html/interactions/HTMLDocument_adopt_DOMDocument.phpt
index a0aa2ba771813..cff51dac28f6b 100644
--- a/ext/dom/tests/HTML5/interactions/Document_adopt_DOMDocument.phpt
+++ b/ext/dom/tests/modern/html/interactions/HTMLDocument_adopt_DOMDocument.phpt
@@ -1,5 +1,5 @@
--TEST--
-DOM\HTML5Document adopts a DOMDocument
+DOM\HTMLDocument adopts a DOMDocument
--EXTENSIONS--
dom
--FILE--
@@ -16,7 +16,7 @@ $dom->loadHTML(<<
HTML);
-$dom2 = new DOM\HTML5Document();
+$dom2 = DOM\HTMLDocument::createEmpty();
$dom2->appendChild($dom2->adoptNode($dom->documentElement));
echo $dom2->saveHTML();
diff --git a/ext/dom/tests/modern/html/interactions/HTMLDocument_clone.phpt b/ext/dom/tests/modern/html/interactions/HTMLDocument_clone.phpt
new file mode 100644
index 0000000000000..1a04dd7beae29
--- /dev/null
+++ b/ext/dom/tests/modern/html/interactions/HTMLDocument_clone.phpt
@@ -0,0 +1,23 @@
+--TEST--
+Cloning a DOM\HTMLDocument
+--EXTENSIONS--
+dom
+--FILE--
+foo");
+
+$dom2 = clone $dom;
+var_dump($dom2->firstChild->tagName);
+var_dump($dom2->firstChild->textContent);
+
+$element = $dom2->firstChild;
+unset($dom2);
+var_dump(get_class($element->ownerDocument));
+
+?>
+--EXPECTF--
+Warning: DOM\HTMLDocument::createFromString(): tree error unexpected-token-in-initial-mode in Entity, line: 1, column: 2 in %s on line %d
+string(4) "html"
+string(3) "foo"
+string(16) "DOM\HTMLDocument"
diff --git a/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_02.phpt b/ext/dom/tests/modern/html/interactions/HTMLDocument_registerNodeClass_01.phpt
similarity index 56%
rename from ext/dom/tests/HTML5/interactions/Document_registerNodeClass_02.phpt
rename to ext/dom/tests/modern/html/interactions/HTMLDocument_registerNodeClass_01.phpt
index c4430b49f479a..e7f4fb630f542 100644
--- a/ext/dom/tests/HTML5/interactions/Document_registerNodeClass_02.phpt
+++ b/ext/dom/tests/modern/html/interactions/HTMLDocument_registerNodeClass_01.phpt
@@ -1,17 +1,17 @@
--TEST--
-DOM\HTML5Document::registerNodeClass 02
+DOM\HTMLDocument::registerNodeClass 01
--EXTENSIONS--
dom
--FILE--
registerNodeClass("DOM\\HTML5Document", "DOMDocument");
+$dom->registerNodeClass("DOM\\HTMLDocument", "DOMDocument");
?>
--EXPECTF--
-Fatal error: Uncaught Error: DOM\Document::registerNodeClass(): Argument #2 ($extendedClass) must be a class name derived from DOM\HTML5Document or null, DOMDocument given in %s:%d
+Fatal error: Uncaught Error: DOM\Document::registerNodeClass(): Argument #2 ($extendedClass) must be a class name derived from DOM\HTMLDocument or null, DOMDocument given in %s:%d
Stack trace:
-#0 %s(%d): DOM\Document->registerNodeClass('DOM\\HTML5Docume...', 'DOMDocument')
+#0 %s(%d): DOM\Document->registerNodeClass('DOM\\HTMLDocumen...', 'DOMDocument')
#1 {main}
thrown in %s on line %d
diff --git a/ext/dom/tests/modern/html/interactions/HTMLDocument_registerNodeClass_02.phpt b/ext/dom/tests/modern/html/interactions/HTMLDocument_registerNodeClass_02.phpt
new file mode 100644
index 0000000000000..774dceaa59780
--- /dev/null
+++ b/ext/dom/tests/modern/html/interactions/HTMLDocument_registerNodeClass_02.phpt
@@ -0,0 +1,36 @@
+--TEST--
+DOM\HTMLDocument::registerNodeClass 02
+--EXTENSIONS--
+dom
+--FILE--
+registerNodeClass("DOM\\Document", "Custom");
+} catch (ValueError $e) {
+ echo $e->getMessage(), "\n";
+}
+
+$element = $dom->appendChild($dom->createElement("foo"));
+unset($dom);
+
+var_dump(get_class($element->ownerDocument));
+
+// Should fail
+$element->ownerDocument->foo();
+
+?>
+--EXPECTF--
+DOM\Document::registerNodeClass(): Argument #1 ($baseClass) must be a non-abstract class
+string(11) "DOMDocument"
+
+Fatal error: Uncaught Error: Call to undefined method DOMDocument::foo() in %s:%d
+Stack trace:
+#0 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_01.phpt b/ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_01.phpt
similarity index 66%
rename from ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_01.phpt
rename to ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_01.phpt
index 0d8a36ff3778c..4a6e816313229 100644
--- a/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_01.phpt
+++ b/ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_01.phpt
@@ -1,13 +1,12 @@
--TEST--
-HTML5 document should retain properties and ownerDocument relation 01
+DOM\HTMLDocument should retain properties and ownerDocument relation 01
--EXTENSIONS--
dom
--FILE--
foo", LIBXML_NOERROR);
$dom->strictErrorChecking = false;
-$dom->loadHTML("foo
", LIBXML_NOERROR);
// Destroy reference to the DOM
$child = $dom->documentElement;
@@ -22,39 +21,11 @@ var_dump($dom->strictErrorChecking);
?>
--EXPECT--
-object(DOM\HTML5Document)#1 (40) {
+object(DOM\HTMLDocument)#1 (26) {
["encoding"]=>
string(5) "UTF-8"
- ["actualEncoding"]=>
- string(5) "UTF-8"
- ["xmlEncoding"]=>
- string(5) "UTF-8"
- ["standalone"]=>
- bool(true)
- ["xmlStandalone"]=>
- bool(true)
- ["version"]=>
- NULL
- ["xmlVersion"]=>
- NULL
- ["config"]=>
- NULL
- ["formatOutput"]=>
- bool(false)
- ["validateOnParse"]=>
- bool(false)
- ["resolveExternals"]=>
- bool(false)
- ["preserveWhiteSpace"]=>
- bool(true)
- ["recover"]=>
- bool(false)
- ["substituteEntities"]=>
- bool(false)
["doctype"]=>
NULL
- ["implementation"]=>
- string(22) "(object value omitted)"
["documentElement"]=>
string(22) "(object value omitted)"
["strictErrorChecking"]=>
diff --git a/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_02.phpt b/ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_02.phpt
similarity index 65%
rename from ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_02.phpt
rename to ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_02.phpt
index 268f52b81c483..9c4d016fef81c 100644
--- a/ext/dom/tests/HTML5/interactions/Document_should_retain_properties_and_owner_02.phpt
+++ b/ext/dom/tests/modern/html/interactions/HTMLDocument_should_retain_properties_and_owner_02.phpt
@@ -1,11 +1,11 @@
--TEST--
-HTML5 document should retain properties and ownerDocument relation 02
+DOM\HTMLDocument should retain properties and ownerDocument relation 02
--EXTENSIONS--
dom
--FILE--
foo", LIBXML_NOERROR);
$dom->strictErrorChecking = false;
$child = $dom->appendChild($dom->createElement('html'));
@@ -13,7 +13,7 @@ $child = $dom->appendChild($dom->createElement('html'));
unset($dom);
// Regain reference using the ownerDocument property
-// Should be a DOM\HTML5Document
+// Should be a DOM\HTMLDocument
$dom = $child->ownerDocument;
var_dump($dom);
// Test if property is preserved (any random doc_props property will do)
@@ -21,39 +21,11 @@ var_dump($dom->strictErrorChecking);
?>
--EXPECT--
-object(DOM\HTML5Document)#1 (40) {
+object(DOM\HTMLDocument)#1 (26) {
["encoding"]=>
- NULL
- ["actualEncoding"]=>
- NULL
- ["xmlEncoding"]=>
- NULL
- ["standalone"]=>
- bool(false)
- ["xmlStandalone"]=>
- bool(false)
- ["version"]=>
- string(3) "1.0"
- ["xmlVersion"]=>
- string(3) "1.0"
- ["config"]=>
- NULL
- ["formatOutput"]=>
- bool(false)
- ["validateOnParse"]=>
- bool(false)
- ["resolveExternals"]=>
- bool(false)
- ["preserveWhiteSpace"]=>
- bool(true)
- ["recover"]=>
- bool(false)
- ["substituteEntities"]=>
- bool(false)
+ string(5) "UTF-8"
["doctype"]=>
NULL
- ["implementation"]=>
- string(22) "(object value omitted)"
["documentElement"]=>
string(22) "(object value omitted)"
["strictErrorChecking"]=>
@@ -65,13 +37,13 @@ object(DOM\HTML5Document)#1 (40) {
["lastElementChild"]=>
string(22) "(object value omitted)"
["childElementCount"]=>
- int(1)
+ int(2)
["nodeName"]=>
string(9) "#document"
["nodeValue"]=>
NULL
["nodeType"]=>
- int(9)
+ int(13)
["parentNode"]=>
NULL
["parentElement"]=>
@@ -101,6 +73,6 @@ object(DOM\HTML5Document)#1 (40) {
["baseURI"]=>
NULL
["textContent"]=>
- string(0) ""
+ string(3) "foo"
}
bool(false)
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_DOM_HTML_NO_DEFAULT_NS copy.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_DOM_HTML_NO_DEFAULT_NS.phpt
similarity index 64%
rename from ext/dom/tests/HTML5/parser/Document_loadHTMLFile_DOM_HTML_NO_DEFAULT_NS copy.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_DOM_HTML_NO_DEFAULT_NS.phpt
index ce8a38c8b5230..55089d8fac345 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_DOM_HTML_NO_DEFAULT_NS copy.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_DOM_HTML_NO_DEFAULT_NS.phpt
@@ -1,19 +1,17 @@
--TEST--
-Document::loadHTMLFile() with DOM\HTML_NO_DEFAULT_NS
+DOM\HTMLDocument::createFromFile() with DOM\HTML_NO_DEFAULT_NS
--EXTENSIONS--
dom
--FILE--
loadHTMLFile(__DIR__ . "/paragraph.html", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/paragraph.html", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
$xpath = new DOMXPath($dom);
$xpath->registerNamespace("x", "http://www.w3.org/1999/xhtml");
var_dump($xpath->query("//p"));
var_dump($xpath->query("//x:p"));
-$dom = new DOM\HTML5Document();
-$dom->loadHTMLFile(__DIR__ . "/paragraph.html", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR | DOM\HTML_NO_DEFAULT_NS);
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/paragraph.html", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR | DOM\HTML_NO_DEFAULT_NS);
$xpath = new DOMXPath($dom);
$xpath->registerNamespace("x", "http://www.w3.org/1999/xhtml");
var_dump($xpath->query("//p"));
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_empty_path.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_empty_path.phpt
new file mode 100644
index 0000000000000..73e55c1f09aac
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_empty_path.phpt
@@ -0,0 +1,16 @@
+--TEST--
+DOM\HTMLDocument::createFromFile() - empty path
+--EXTENSIONS--
+dom
+--FILE--
+
+--EXPECTF--
+Fatal error: Uncaught ValueError: Path cannot be empty in %s:%d
+Stack trace:
+#0 %s(%d): DOM\HTMLDocument::createFromFile('')
+#1 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_local_existing_file.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_local_existing_file.phpt
new file mode 100644
index 0000000000000..0e9e2b60e2bc7
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_local_existing_file.phpt
@@ -0,0 +1,22 @@
+--TEST--
+DOM\HTMLDocument::createFromFile() - local existing file
+--EXTENSIONS--
+dom
+--FILE--
+saveHTML(), "\n";
+
+?>
+--EXPECTF--
+Warning: DOM\HTMLDocument::createFromFile(): tree error unexpected-token-in-initial-mode in %s on line %d
+
+Hello world
+
+
+This is a not well-formed
+html files with undeclared entities
+
+
+
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_local_file_does_not_exist.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_local_file_does_not_exist.phpt
new file mode 100644
index 0000000000000..8bb91cf8ee7b6
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_local_file_does_not_exist.phpt
@@ -0,0 +1,19 @@
+--TEST--
+DOM\HTMLDocument::createFromFile() - local file that does not exist
+--EXTENSIONS--
+dom
+--FILE--
+saveHTML(), "\n";
+
+?>
+--EXPECTF--
+Warning: DOM\HTMLDocument::createFromFile(%s): Failed to open stream: No such file or directory in %s on line %d
+
+Fatal error: Uncaught Exception: Cannot open file '%s' in %s:%d
+Stack trace:
+#0 %s(%d): DOM\HTMLDocument::createFromFile('%s')
+#1 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_nul_terminator_cases_path.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_nul_terminator_cases_path.phpt
new file mode 100644
index 0000000000000..6e79c2f12a941
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_nul_terminator_cases_path.phpt
@@ -0,0 +1,22 @@
+--TEST--
+DOM\HTMLDocument::createFromFile() - NUL terminator cases path
+--EXTENSIONS--
+dom
+--FILE--
+getMessage(), "\n";
+}
+try {
+ DOM\HTMLDocument::createFromFile('%00');
+} catch (ValueError $e) {
+ echo $e->getMessage(), "\n";
+}
+
+?>
+--EXPECT--
+DOM\HTMLDocument::createFromFile(): Argument #1 ($path) must not contain any null bytes
+DOM\HTMLDocument::createFromFile(): Argument #1 ($path) must not contain percent-encoded NUL bytes
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_01.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_parser_warning_01.phpt
similarity index 65%
rename from ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_01.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_parser_warning_01.phpt
index 8b95d43205677..9c77ebeadd4d0 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_01.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_parser_warning_01.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document::loadHTMLFile() - parser warning 01
+DOM\HTMLDocument::createFromFile() - parser warning 01
--EXTENSIONS--
dom
--FILE--
loadHTMLFile(__DIR__."/parser_warning_01.html", LIBXML_NOERROR);
+$dom = DOM\HTMLDocument::createFromFile(__DIR__."/parser_warning_01.html", LIBXML_NOERROR);
echo $dom->saveHTML(), "\n";
?>
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_02.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_parser_warning_02.phpt
similarity index 65%
rename from ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_02.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_parser_warning_02.phpt
index de2c9ce8b16b0..9d3dd944e7ba5 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_02.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_parser_warning_02.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document::loadHTMLFile() - parser warning 02
+DOM\HTMLDocument::createFromFile() - parser warning 02
--EXTENSIONS--
dom
--FILE--
loadHTMLFile(__DIR__."/parser_warning_02.html", LIBXML_NOERROR);
+$dom = DOM\HTMLDocument::createFromFile(__DIR__."/parser_warning_02.html", LIBXML_NOERROR);
echo $dom->saveHTML(), "\n";
?>
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_03.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_parser_warning_03.phpt
similarity index 51%
rename from ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_03.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_parser_warning_03.phpt
index 7b58fee4ca484..f5cada6c18f22 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_parser_warning_03.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_parser_warning_03.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document::loadHTMLFile() - parser warning 03
+DOM\HTMLDocument::createFromFile() - parser warning 03
--EXTENSIONS--
dom
--FILE--
loadHTMLFile(__DIR__."/parser_warning_03.html", LIBXML_NOERROR);
+$dom = DOM\HTMLDocument::createFromFile(__DIR__."/parser_warning_03.html", LIBXML_NOERROR);
echo $dom->saveHTML(), "\n";
?>
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_failing_stream_wrapper.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_with_failing_stream_wrapper.phpt
similarity index 73%
rename from ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_failing_stream_wrapper.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_with_failing_stream_wrapper.phpt
index 31bd37e18b23f..78e07b72bbef4 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_failing_stream_wrapper.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_with_failing_stream_wrapper.phpt
@@ -1,5 +1,5 @@
--TEST--
-DOM\HTML5Document::loadHTMLFile() with failing stream wrapper
+DOM\HTMLDocument::createFromFile() with failing stream wrapper
--EXTENSIONS--
dom
--FILE--
@@ -32,20 +32,13 @@ class FailingWrapper {
stream_wrapper_register("fail", FailingWrapper::class, 0);
-$dom = new DOM\HTML5Document();
-
-try {
- $dom->loadHTMLFile("fail://x");
-} catch (Exception $e) {
- echo $e->getMessage(), "\n";
-}
-echo $dom->saveHTML(), "\n";
+DOM\HTMLDocument::createFromFile("fail://x");
?>
--EXPECTF--
Fatal error: Uncaught Error: fail in %s:%d
Stack trace:
#0 [internal function]: FailingWrapper->stream_read(8192)
-#1 %s(%d): DOM\HTML5Document->loadHTMLFile('fail://x')
+#1 %s(%d): DOM\HTMLDocument::createFromFile('fail://x')
#2 {main}
thrown in %s on line %d
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_working_stream_wrapper.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_with_working_stream_wrapper.phpt
similarity index 76%
rename from ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_working_stream_wrapper.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_with_working_stream_wrapper.phpt
index d18ef6ad6a1d9..e2e24fdba2727 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTMLFile_with_working_stream_wrapper.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_with_working_stream_wrapper.phpt
@@ -1,5 +1,5 @@
--TEST--
-DOM\HTML5Document::loadHTMLFile() with working stream wrapper
+DOM\HTMLDocument::createFromFile() with working stream wrapper
--EXTENSIONS--
dom
--FILE--
@@ -32,18 +32,16 @@ class EchoUriWrapper {
stream_wrapper_register("euw", EchoUriWrapper::class, 0);
-$dom = new DOM\HTML5Document();
-
echo "--- Stream wrapper case ---\n";
-$dom->loadHTMLFile("euw://hello
");
+$dom = DOM\HTMLDocument::createFromFile("euw://hello
");
echo $dom->saveHTML(), "\n";
echo "--- Stream wrapper in two chunks case ---\n";
libxml_use_internal_errors(true);
// To properly test this, keep the 4096 in sync with document.c's input stream buffer size.
-$dom->loadHTMLFile("euw://" . str_repeat("\n", 4096-22) . "<>");
+$dom = DOM\HTMLDocument::createFromFile("euw://" . str_repeat("\n", 4096-22) . "<>");
echo $dom->saveHTML(), "\n";
foreach (libxml_get_errors() as $error) {
@@ -54,7 +52,7 @@ foreach (libxml_get_errors() as $error) {
--EXPECTF--
--- Stream wrapper case ---
-Warning: DOM\HTML5Document::loadHTMLFile(): tree error unexpected-token-in-initial-mode in euw://hello
, line: 1, column: 2 in %s on line %d
+Warning: DOM\HTMLDocument::createFromFile(): tree error unexpected-token-in-initial-mode in euw://hello
, line: 1, column: 2 in %s on line %d
hello
--- Stream wrapper in two chunks case ---
<>
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_DOM_HTML_NO_DEFAULT_NS.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_DOM_HTML_NO_DEFAULT_NS.phpt
similarity index 62%
rename from ext/dom/tests/HTML5/parser/Document_loadHTML_DOM_HTML_NO_DEFAULT_NS.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromString_DOM_HTML_NO_DEFAULT_NS.phpt
index fbb266afaa8a2..0e6839007fc8e 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTML_DOM_HTML_NO_DEFAULT_NS.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_DOM_HTML_NO_DEFAULT_NS.phpt
@@ -1,19 +1,17 @@
--TEST--
-Document::loadHTML() with DOM\HTML_NO_DEFAULT_NS
+DOM\HTMLDocument::createFromString() with DOM\HTML_NO_DEFAULT_NS
--EXTENSIONS--
dom
--FILE--
loadHTML(file_get_contents(__DIR__ . "/paragraph.html"), LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
+$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . "/paragraph.html"), LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
$xpath = new DOMXPath($dom);
$xpath->registerNamespace("x", "http://www.w3.org/1999/xhtml");
var_dump($xpath->query("//p"));
var_dump($xpath->query("//x:p"));
-$dom = new DOM\HTML5Document();
-$dom->loadHTML(file_get_contents(__DIR__ . "/paragraph.html"), LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR | DOM\HTML_NO_DEFAULT_NS);
+$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . "/paragraph.html"), LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR | DOM\HTML_NO_DEFAULT_NS);
$xpath = new DOMXPath($dom);
$xpath->registerNamespace("x", "http://www.w3.org/1999/xhtml");
var_dump($xpath->query("//p"));
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_COMPACT.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_COMPACT.phpt
similarity index 81%
rename from ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_COMPACT.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_COMPACT.phpt
index 06ae52ee862bb..28146903105ec 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_COMPACT.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_COMPACT.phpt
@@ -1,12 +1,11 @@
--TEST--
-HTML5Document::loadHTML() with LIBXML_COMPACT
+DOM\HTMLDocument::createFromString() with LIBXML_COMPACT
--EXTENSIONS--
dom
--FILE--
loadHTML(<<
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED_namespace.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_HTML_NOIMPLIED_namespace.phpt
similarity index 60%
rename from ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED_namespace.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_HTML_NOIMPLIED_namespace.phpt
index 745b8e98899b2..2ed7112f63884 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED_namespace.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_HTML_NOIMPLIED_namespace.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document::loadHTML() with LIBXML_HTML_NOIMPLIED namespace check
+DOM\HTMLDocument::createFromString() with LIBXML_HTML_NOIMPLIED namespace check
--EXTENSIONS--
dom
--FILE--
loadHTML("foo
", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
+$dom = DOM\HTMLDocument::createFromString("foo
", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
echo $dom->saveXML();
var_dump($dom->documentElement->namespaceURI);
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_empty.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_empty.phpt
similarity index 55%
rename from ext/dom/tests/HTML5/parser/Document_loadHTML_empty.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromString_empty.phpt
index 885b1b69d062f..898ae43fabb24 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTML_empty.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_empty.phpt
@@ -1,13 +1,11 @@
--TEST--
-DOM\HTML5Document::loadHTML() - empty document
+DOM\HTMLDocument::createFromString() - empty document
--EXTENSIONS--
dom
--FILE--
loadHTML('');
+$dom = DOM\HTMLDocument::createFromString('');
echo $dom->saveHTML(), "\n";
?>
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_fromFile_LIBXML_HTML_NOIMPLIED.phpt
similarity index 83%
rename from ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromString_fromFile_LIBXML_HTML_NOIMPLIED.phpt
index 6d11a41b727c4..3e26fc35b950f 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTML_LIBXML_HTML_NOIMPLIED.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_fromFile_LIBXML_HTML_NOIMPLIED.phpt
@@ -1,26 +1,23 @@
--TEST--
-DOM\HTML5Document::loadHTML() with LIBXML_HTML_NOIMPLIED
+DOM\HTMLDocument::createFromString()/createFromFile() with LIBXML_HTML_NOIMPLIED
--EXTENSIONS--
dom
--FILE--
loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
+ $dom = DOM\HTMLDocument::createFromString($html, LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
$output = $dom->saveHTML();
echo $output, "\n";
// Also test the loadHTMLFile variation. We won't print out the result, just checking the result is the same.
- $temp = fopen(__DIR__."/DOM_HTML5Document_loadHTML_LIBXML_HTML_NOIMPLIED_input.tmp", "w");
+ $temp = fopen(__DIR__."/DOM_HTMLDocument_loadHTML_LIBXML_HTML_NOIMPLIED_input.tmp", "w");
fwrite($temp, $html);
fclose($temp);
- $dom->loadHTMLFile(__DIR__."/DOM_HTML5Document_loadHTML_LIBXML_HTML_NOIMPLIED_input.tmp", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
+ $dom = DOM\HTMLDocument::createFromFile(__DIR__."/DOM_HTMLDocument_loadHTML_LIBXML_HTML_NOIMPLIED_input.tmp", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
var_dump($output === $dom->saveHTML());
}
-$dom = new DOM\HTML5Document();
-
echo "--- Missing html, head, body ---\n";
test("");
test("foobarbaz");
@@ -44,7 +41,7 @@ test("foo");
?>
--CLEAN--
--EXPECT--
--- Missing html, head, body ---
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_line_column.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_line_column.phpt
similarity index 91%
rename from ext/dom/tests/HTML5/parser/Document_loadHTML_line_column.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromString_line_column.phpt
index 86c8b33ef2d78..ba47a715fb394 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTML_line_column.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_line_column.phpt
@@ -1,13 +1,11 @@
--TEST--
-DOM\HTML5Document::loadHTML() - line and column test
+DOM\HTMLDocument::createFromString() - line and column test
--EXTENSIONS--
dom
--FILE--
loadHTML(<<
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_normal_no_error.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_normal_no_error.phpt
similarity index 86%
rename from ext/dom/tests/HTML5/parser/Document_loadHTML_normal_no_error.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromString_normal_no_error.phpt
index cc3fa386170d0..b4a778a68d3ea 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTML_normal_no_error.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_normal_no_error.phpt
@@ -1,12 +1,10 @@
--TEST--
-DOM\HTML5Document::loadHTML() - normal document, no error
+DOM\HTMLDocument::createFromString() - normal document, no error
--EXTENSIONS--
dom
--FILE--
@@ -23,7 +21,7 @@ $html = <<
HTML;
-$dom->loadHTML($html);
+$dom = DOM\HTMLDocument::createFromString($html);
echo $dom->saveHTML(), "\n";
?>
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_old_dtd.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_old_dtd.phpt
similarity index 73%
rename from ext/dom/tests/HTML5/parser/Document_loadHTML_old_dtd.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromString_old_dtd.phpt
index 210ed3bfc32d3..f140b8b1f349b 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTML_old_dtd.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_old_dtd.phpt
@@ -1,12 +1,11 @@
--TEST--
-HTML5Document::loadHTML(): Old DTD
+DOM\HTMLDocument::createFromString(): Old DTD
--EXTENSIONS--
dom
--FILE--
loadHTML(<<
@@ -23,7 +22,7 @@ echo $dom->saveXML();
?>
--EXPECTF--
-Warning: DOM\HTML5Document::loadHTML(): tree error bad-doctype-token-in-initial-mode in Entity, line: 1, column: 3-9 in %s on line %d
+Warning: DOM\HTMLDocument::createFromString(): tree error bad-doctype-token-in-initial-mode in Entity, line: 1, column: 3-9 in %s on line %d
--- HTML serialization ---
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_01.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_01.phpt
new file mode 100644
index 0000000000000..4469ee890ee7d
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_01.phpt
@@ -0,0 +1,23 @@
+--TEST--
+DOM\HTMLDocument::createFromString() - parser warning 01
+--EXTENSIONS--
+dom
+--FILE--
+saveHTML(), "\n";
+
+?>
+--EXPECTF--
+Warning: DOM\HTMLDocument::createFromString(): tokenizer error missing-end-tag-name in Entity, line: 7, column: 11 in %s on line %d
+
+Warning: DOM\HTMLDocument::createFromString(): tree error unexpected-token-in-initial-mode in Entity, line: 1, column: 2-6 in %s on line %d
+foo
+
+
+
+
+
+error
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_02.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_02.phpt
new file mode 100644
index 0000000000000..08abc826af8df
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_02.phpt
@@ -0,0 +1,32 @@
+--TEST--
+DOM\HTMLDocument::createFromString() - parser warning 02
+--EXTENSIONS--
+dom
+--FILE--
+saveHTML(), "\n";
+
+?>
+--EXPECTF--
+Warning: DOM\HTMLDocument::createFromString(): tokenizer error unexpected-null-character in Entity, line: 4, column: 11 in %s on line %d
+
+Warning: DOM\HTMLDocument::createFromString(): tokenizer error missing-whitespace-between-attributes in Entity, line: 5, column: 20 in %s on line %d
+
+Warning: DOM\HTMLDocument::createFromString(): tokenizer error incorrectly-opened-comment in Entity, line: 6, column: 11 in %s on line %d
+
+Warning: DOM\HTMLDocument::createFromString(): tokenizer error nested-comment in Entity, line: 7, column: 18 in %s on line %d
+
+Warning: DOM\HTMLDocument::createFromString(): tree error unexpected-closed-token in Entity, line: 4, column: 18 in %s on line %d
+
+Warning: DOM\HTMLDocument::createFromString(): tree error doctype-token-in-body-mode in Entity, line: 8, column: 11-17 in %s on line %d
+
+ foo
+
+
+ -->
+
+
+
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_03.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_03.phpt
similarity index 64%
rename from ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_03.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_03.phpt
index 12f5d810c9af8..2de4b36e2ef42 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_03.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_03.phpt
@@ -1,13 +1,12 @@
--TEST--
-DOM\HTML5Document::loadHTML() - parser warning 03
+DOM\HTMLDocument::createFromString() - parser warning 03
--EXTENSIONS--
dom
--FILE--
loadHTML($html, LIBXML_NOERROR);
+$dom = DOM\HTMLDocument::createFromString($html, LIBXML_NOERROR);
echo $dom->saveHTML(), "\n";
?>
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_internal_error.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_internal_error.phpt
similarity index 83%
rename from ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_internal_error.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_internal_error.phpt
index 006ee453a3b73..43b8cc4905aae 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTML_parser_warning_internal_error.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_internal_error.phpt
@@ -1,16 +1,14 @@
--TEST--
-DOM\HTML5Document::loadHTML() - parser warning via internal error
+DOM\HTMLDocument::createFromString() - parser warning via internal error
--EXTENSIONS--
dom
--FILE--
x> ';
-$dom->loadHTML($html);
+$dom = DOM\HTMLDocument::createFromString($html);
foreach (libxml_get_errors() as $error) {
var_dump($error->message, $error->line, $error->column);
}
diff --git a/ext/dom/tests/HTML5/parser/Document_loadHTML_without_body.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_without_body.phpt
similarity index 50%
rename from ext/dom/tests/HTML5/parser/Document_loadHTML_without_body.phpt
rename to ext/dom/tests/modern/html/parser/HTMLDocument_fromString_without_body.phpt
index 1858481d4b982..047c18640849f 100644
--- a/ext/dom/tests/HTML5/parser/Document_loadHTML_without_body.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_without_body.phpt
@@ -1,14 +1,11 @@
--TEST--
-DOM\HTML5Document::loadHTML() - document without body
+DOM\HTMLDocument::createFromString() - document without body
--EXTENSIONS--
dom
--FILE--
foo '
';
-$dom->loadHTML($html);
+$dom = DOM\HTMLDocument::createFromString('foo '
');
echo $dom->saveHTML(), "\n";
?>
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_parse_options.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_parse_options.phpt
new file mode 100644
index 0000000000000..c1e281b5331a9
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_parse_options.phpt
@@ -0,0 +1,107 @@
+--TEST--
+DOM\HTMLDocument: loading $options check
+--EXTENSIONS--
+dom
+--FILE--
+getMessage(), "\n";
+ }
+ }
+}
+
+?>
+--EXPECTF--
+--- Method createFromString ---
+int(%d)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4194304)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(524288)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(8)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(16)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(256)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(16384)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(2)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(1024)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(1)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(2048)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(64)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(128)
+DOM\HTMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+--- Method createFromFile ---
+int(%d)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4194304)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(524288)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(8)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(16)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(256)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(16384)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(4)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(2)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(1024)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(1)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(2048)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(64)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
+int(128)
+DOM\HTMLDocument::createFromFile(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\NO_DEFAULT_NS)
diff --git a/ext/dom/tests/HTML5/parser/paragraph.html b/ext/dom/tests/modern/html/parser/paragraph.html
similarity index 100%
rename from ext/dom/tests/HTML5/parser/paragraph.html
rename to ext/dom/tests/modern/html/parser/paragraph.html
diff --git a/ext/dom/tests/HTML5/parser/parser_warning_01.html b/ext/dom/tests/modern/html/parser/parser_warning_01.html
similarity index 100%
rename from ext/dom/tests/HTML5/parser/parser_warning_01.html
rename to ext/dom/tests/modern/html/parser/parser_warning_01.html
diff --git a/ext/dom/tests/HTML5/parser/parser_warning_02.html b/ext/dom/tests/modern/html/parser/parser_warning_02.html
similarity index 100%
rename from ext/dom/tests/HTML5/parser/parser_warning_02.html
rename to ext/dom/tests/modern/html/parser/parser_warning_02.html
diff --git a/ext/dom/tests/HTML5/parser/parser_warning_03.html b/ext/dom/tests/modern/html/parser/parser_warning_03.html
similarity index 100%
rename from ext/dom/tests/HTML5/parser/parser_warning_03.html
rename to ext/dom/tests/modern/html/parser/parser_warning_03.html
diff --git a/ext/dom/tests/HTML5/parser/predefined_namespaces.phpt b/ext/dom/tests/modern/html/parser/predefined_namespaces.phpt
similarity index 96%
rename from ext/dom/tests/HTML5/parser/predefined_namespaces.phpt
rename to ext/dom/tests/modern/html/parser/predefined_namespaces.phpt
index 927bf6a330939..39e8bf0131d09 100644
--- a/ext/dom/tests/HTML5/parser/predefined_namespaces.phpt
+++ b/ext/dom/tests/modern/html/parser/predefined_namespaces.phpt
@@ -1,12 +1,11 @@
--TEST--
-HTML5Document: Predefined namespaces
+HTMLDocument: Predefined namespaces
--EXTENSIONS--
dom
--FILE--
loadHTML(<<
diff --git a/ext/dom/tests/HTML5/serializer/Document_escape_attribute.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_escape_attribute.phpt
similarity index 67%
rename from ext/dom/tests/HTML5/serializer/Document_escape_attribute.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_escape_attribute.phpt
index 17ba3e912411a..d4a888bffe82e 100644
--- a/ext/dom/tests/HTML5/serializer/Document_escape_attribute.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_escape_attribute.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document serialization escape attribute
+DOM\HTMLDocument serialization escape attribute
--EXTENSIONS--
dom
--FILE--
loadHTML("
", LIBXML_NOERROR);
+$dom = DOM\HTMLDocument::createFromString("
", LIBXML_NOERROR);
$p = $dom->documentElement->firstChild->nextSibling->firstChild;
$p->setAttribute("foo", "\"'&");
echo $dom->saveHTML();
diff --git a/ext/dom/tests/HTML5/serializer/Document_escape_nbsp.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_escape_nbsp.phpt
similarity index 50%
rename from ext/dom/tests/HTML5/serializer/Document_escape_nbsp.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_escape_nbsp.phpt
index 5d9988fdcb81e..c7b11a7a4c74f 100644
--- a/ext/dom/tests/HTML5/serializer/Document_escape_nbsp.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_escape_nbsp.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document serialization escape nbsp
+DOM\HTMLDocument serialization escape nbsp
--EXTENSIONS--
dom
--FILE--
loadHTML("these must transform: \xc2\xa0\xc2\xa0 but these not: \xa0|\xc2...
", LIBXML_NOERROR);
+$dom = DOM\HTMLDocument::createFromString("these must transform: \xc2\xa0\xc2\xa0 but these not: \xa0|\xc2...
", LIBXML_NOERROR);
echo $dom->saveHTML();
?>
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_attribute_ns.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_attribute_ns.phpt
similarity index 88%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_attribute_ns.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_attribute_ns.phpt
index 14616aedf01f7..9e8865f9971f8 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_attribute_ns.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_attribute_ns.phpt
@@ -1,11 +1,11 @@
--TEST--
-DOM\HTML5Document serialization of an attribute in a namespace
+DOM\HTMLDocument serialization of an attribute in a namespace
--EXTENSIONS--
dom
--FILE--
appendChild($dom->createElement("root"));
$root->setAttributeNodeNS($dom->createAttributeNS("http://php.net", "x:foo"));
$root->setAttributeNodeNS($dom->createAttributeNS("http://www.w3.org/XML/1998/namespace", "y:id"));
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_cdata.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_cdata.phpt
similarity index 67%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_cdata.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_cdata.phpt
index df40f919d9b9a..2951ff59e9d12 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_cdata.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_cdata.phpt
@@ -1,11 +1,11 @@
--TEST--
-DOM\HTML5Document serialization of CData
+DOM\HTMLDocument serialization of CData
--EXTENSIONS--
dom
--FILE--
appendChild($dom->createCDATASection("foobaré\"<>-&"));
echo $dom->saveHTML();
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_comment.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_comment.phpt
similarity index 66%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_comment.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_comment.phpt
index ee78dfaeec3c1..4abc382a3ae1f 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_comment.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_comment.phpt
@@ -1,11 +1,11 @@
--TEST--
-DOM\HTML5Document serialization of comment
+DOM\HTMLDocument serialization of comment
--EXTENSIONS--
dom
--FILE--
appendChild($dom->createComment("foobaré\"<>-&"));
echo $dom->saveHTML();
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_doctype.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_doctype.phpt
similarity index 88%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_doctype.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_doctype.phpt
index 6e991bbd79757..97e2547a3d60f 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_doctype.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_doctype.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document serialization of document type
+DOM\HTMLDocument serialization of document type
--EXTENSIONS--
dom
--FILE--
loadHTML(<<
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_element_ns.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_element_ns.phpt
similarity index 88%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_element_ns.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_element_ns.phpt
index d5fdf386a91a0..03813a6252088 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_element_ns.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_element_ns.phpt
@@ -1,11 +1,11 @@
--TEST--
-DOM\HTML5Document serialization of element in a namespace
+DOM\HTMLDocument serialization of element in a namespace
--EXTENSIONS--
dom
--FILE--
appendChild($dom->createElement("root"));
$root->append("\n");
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_failing_stream.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_failing_stream.phpt
similarity index 85%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_failing_stream.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_failing_stream.phpt
index 807bd8c0e3e5e..7241c34f7754f 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_failing_stream.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_failing_stream.phpt
@@ -1,5 +1,5 @@
--TEST--
-DOM\HTML5Document serialization with a failing stream
+DOM\HTMLDocument serialization with a failing stream
--EXTENSIONS--
dom
--FILE--
@@ -33,7 +33,7 @@ class FailingWrapper {
stream_wrapper_register("failing", "FailingWrapper");
-$dom = new DOM\HTML5Document();
+$dom = DOM\HTMLDocument::createEmpty();
$root = $dom->appendChild($dom->createElement("root"));
$dom->saveHTMLFile("failing://foo");
@@ -44,6 +44,6 @@ string(1) "<"
Fatal error: Uncaught Error: fail in %s:%d
Stack trace:
#0 [internal function]: FailingWrapper->stream_write('root')
-#1 %s(%d): DOM\HTML5Document->saveHTMLFile('failing://foo')
+#1 %s(%d): DOM\HTMLDocument->saveHTMLFile('failing://foo')
#2 {main}
thrown in %s on line %d
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_fragment.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_fragment.phpt
similarity index 81%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_fragment.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_fragment.phpt
index 81360ab65dc63..69c47eca4ace9 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_fragment.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_fragment.phpt
@@ -1,11 +1,11 @@
--TEST--
-DOM\HTML5Document serialization of document fragment
+DOM\HTMLDocument serialization of document fragment
--EXTENSIONS--
dom
--FILE--
createDocumentFragment();
$fragment->appendChild($dom->createElement("foo"));
$bar = $fragment->appendChild($dom->createElement("bar"));
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_full_document.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_full_document.phpt
similarity index 90%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_full_document.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_full_document.phpt
index b9f5c973de614..28fb38024a7bc 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_full_document.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_full_document.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document serialization of full document
+DOM\HTMLDocument serialization of full document
--EXTENSIONS--
dom
--FILE--
loadHTML(<<
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_01.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_01.phpt
similarity index 73%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_01.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_01.phpt
index e55cf7e0bed36..d76529945c2a7 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_01.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_01.phpt
@@ -1,21 +1,18 @@
--TEST--
-Document serialization with an imported namespace node 01
+DOM\HTMLDocument serialization with an imported namespace node 01
--EXTENSIONS--
dom
--FILE--
loadXML(' ');
+$xml = DOM\XMLDocument::createFromString(' ');
$xml->documentElement->setAttributeNS("http://foo/", "foo:bar", "value");
$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
echo $xml->saveXML();
-echo $xml->saveHTML(), "\n";
echo "--- After import into HTML ---\n";
-$html = new DOM\HTML5Document();
-$html->loadHTML('foo
', LIBXML_NOERROR);
+$html = DOM\HTMLDocument::createFromString('foo
', LIBXML_NOERROR);
$p = $html->documentElement->firstChild->nextSibling->firstChild;
$p->appendChild($html->importNode($xml->documentElement, true));
@@ -27,7 +24,6 @@ echo $html->saveHTML(), "\n";
--EXPECT--
-
--- After import into HTML ---
foo
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_02.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_02.phpt
similarity index 69%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_02.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_02.phpt
index 98844f4493d1c..ad9e6446719e0 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_02.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_02.phpt
@@ -1,20 +1,17 @@
--TEST--
-Document serialization with an imported namespace node 02
+DOM\HTMLDocument serialization with an imported namespace node 02
--EXTENSIONS--
dom
--FILE--
loadXML(' ');
+$xml = DOM\XMLDocument::createFromString(' ');
$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
echo $xml->saveXML();
-echo $xml->saveHTML(), "\n";
echo "--- After import into HTML ---\n";
-$html = new DOM\HTML5Document();
-$html->loadHTML('foo
', LIBXML_NOERROR);
+$html = DOM\HTMLDocument::createFromString('foo
', LIBXML_NOERROR);
$p = $html->documentElement->firstChild->nextSibling->firstChild;
$p->appendChild($html->importNode($xml->documentElement, true));
@@ -26,7 +23,6 @@ echo $html->saveHTML(), "\n";
--EXPECT--
-
--- After import into HTML ---
foo
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_04.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_03.phpt
similarity index 66%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_04.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_03.phpt
index 4905e5daa275d..4669d2b391caf 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_04.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_03.phpt
@@ -1,20 +1,17 @@
--TEST--
-Document serialization with an imported namespace node 04
+DOM\HTMLDocument serialization with an imported namespace node 03
--EXTENSIONS--
dom
--FILE--
loadXML(' ');
+$xml = DOM\XMLDocument::createFromString(' ');
$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
echo $xml->saveXML();
-echo $xml->saveHTML(), "\n";
echo "--- After import into HTML ---\n";
-$html = new DOM\HTML5Document();
-$html->loadHTML('foo
', LIBXML_NOERROR);
+$html = DOM\HTMLDocument::createFromString('foo
', LIBXML_NOERROR);
$p = $html->documentElement->firstChild->nextSibling->firstChild;
$p->appendChild($html->importNode($xml->documentElement, false));
@@ -26,7 +23,6 @@ echo $html->saveHTML(), "\n";
--EXPECT--
-
--- After import into HTML ---
foo
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_03.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_04.phpt
similarity index 66%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_03.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_04.phpt
index d0340fbe806e1..a5014119c3c33 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_03.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_04.phpt
@@ -1,20 +1,17 @@
--TEST--
-Document serialization with an imported namespace node 03
+DOM\HTMLDocument serialization with an imported namespace node 04
--EXTENSIONS--
dom
--FILE--
loadXML(' ');
+$xml = DOM\XMLDocument::createFromString(' ');
$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
echo $xml->saveXML();
-echo $xml->saveHTML(), "\n";
echo "--- After import into HTML ---\n";
-$html = new DOM\HTML5Document();
-$html->loadHTML('foo
', LIBXML_NOERROR);
+$html = DOM\HTMLDocument::createFromString('foo
', LIBXML_NOERROR);
$p = $html->documentElement->firstChild->nextSibling->firstChild;
$p->appendChild($html->importNode($xml->documentElement, false));
@@ -26,7 +23,6 @@ echo $html->saveHTML(), "\n";
--EXPECT--
-
--- After import into HTML ---
foo
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_05.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_05.phpt
similarity index 69%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_05.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_05.phpt
index 7c1f72f867b31..f0a9a594fde45 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_05.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_05.phpt
@@ -1,20 +1,17 @@
--TEST--
-Document serialization with an imported namespace node 05
+DOM\HTMLDocument serialization with an imported namespace node 05
--EXTENSIONS--
dom
--FILE--
loadXML(' ');
+$xml = DOM\XMLDocument::createFromString(' ');
$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
echo $xml->saveXML();
-echo $xml->saveHTML(), "\n";
echo "--- After adoption into HTML ---\n";
-$html = new DOM\HTML5Document();
-$html->loadHTML('foo
', LIBXML_NOERROR);
+$html = DOM\HTMLDocument::createFromString('foo
', LIBXML_NOERROR);
$p = $html->documentElement->firstChild->nextSibling->firstChild;
$p->appendChild($html->adoptNode($xml->documentElement));
@@ -26,7 +23,6 @@ echo $html->saveHTML(), "\n";
--EXPECT--
-
--- After adoption into HTML ---
foo
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_06.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_06.phpt
similarity index 66%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_06.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_06.phpt
index f67ecbb082b75..0f7a70f607f51 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_ns_imported_06.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_06.phpt
@@ -1,20 +1,17 @@
--TEST--
-Document serialization with an imported namespace node 06
+DOM\HTMLDocument serialization with an imported namespace node 06
--EXTENSIONS--
dom
--FILE--
loadXML(' ');
+$xml = DOM\XMLDocument::createFromString(' ');
$xml->documentElement->firstChild->appendChild($xml->createElementNS('some:ns2', 'child'));
echo $xml->saveXML();
-echo $xml->saveHTML(), "\n";
echo "--- After clone + import into HTML ---\n";
-$html = new DOM\HTML5Document();
-$html->loadHTML('foo
', LIBXML_NOERROR);
+$html = DOM\HTMLDocument::createFromString('foo
', LIBXML_NOERROR);
$p = $html->documentElement->firstChild->nextSibling->firstChild;
$p->appendChild($html->adoptNode($xml->documentElement->firstChild->cloneNode(true)));
@@ -26,7 +23,6 @@ echo $html->saveHTML(), "\n";
--EXPECT--
-
--- After clone + import into HTML ---
foo
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_processing_instruction.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_processing_instruction.phpt
similarity index 74%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_processing_instruction.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_processing_instruction.phpt
index 3d34acd403e7a..c45bef620faa0 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_processing_instruction.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_processing_instruction.phpt
@@ -1,11 +1,11 @@
--TEST--
-DOM\HTML5Document serialization of processing instruction
+DOM\HTMLDocument serialization of processing instruction
--EXTENSIONS--
dom
--FILE--
in a processing instruction element but that breaks (as expected)
$dom->appendChild($dom->createProcessingInstruction("target", "foobaré\"&<\xc2\xa0"));
echo $dom->saveHTML();
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_roots_test_empty.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_roots_test_empty.phpt
similarity index 85%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_roots_test_empty.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_roots_test_empty.phpt
index d1143a264fe7a..fd33e67d7e484 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_roots_test_empty.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_roots_test_empty.phpt
@@ -1,11 +1,11 @@
--TEST--
-DOM\HTML5Document serialization of different roots resulting in an empty result
+DOM\HTMLDocument serialization of different roots resulting in an empty result
--EXTENSIONS--
dom
--FILE--
appendChild($dom->createComment("comment"));
$cdata = $dom->appendChild($dom->createCDATASection("cdata"));
$emptyElement = $dom->appendChild($dom->createElement("empty"));
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_text_01.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_01.phpt
similarity index 69%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_text_01.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_01.phpt
index 6f1b0c1c98f7d..025bee90ced70 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_text_01.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_01.phpt
@@ -1,12 +1,11 @@
--TEST--
-DOM\HTML5Document serialization escape text 01
+DOM\HTMLDocument serialization escape text 01
--EXTENSIONS--
dom
--FILE--
loadHTML("
", LIBXML_NOERROR);
+$dom = DOM\HTMLDocument::createFromString("
", LIBXML_NOERROR);
$p = $dom->documentElement->firstChild->nextSibling->firstChild;
$p->textContent = "this is &text! \"\"";
echo $dom->saveHTML();
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_text_02.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_02.phpt
similarity index 88%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_text_02.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_02.phpt
index 5c9a31a12bf63..4b9ead2e723cd 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_text_02.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_02.phpt
@@ -1,11 +1,11 @@
--TEST--
-DOM\HTML5Document serialization escape text 02
+DOM\HTMLDocument serialization escape text 02
--EXTENSIONS--
dom
--FILE--
appendChild($dom->createElement("body"));
foreach (["style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"] as $tag) {
$tag = $body->appendChild($dom->createElement($tag));
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_text_03.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_03.phpt
similarity index 90%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_text_03.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_03.phpt
index 5da3187652e05..d9c8b39095544 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_text_03.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_03.phpt
@@ -1,11 +1,11 @@
--TEST--
-DOM\HTML5Document serialization escape text 03
+DOM\HTMLDocument serialization escape text 03
--EXTENSIONS--
dom
--FILE--
appendChild($dom->createElement("body"));
foreach (["style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"] as $tag) {
$tag = $body->appendChild($dom->createElementNS("some:ns", $tag));
diff --git a/ext/dom/tests/HTML5/serializer/Document_serialize_void_elements.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_void_elements.phpt
similarity index 95%
rename from ext/dom/tests/HTML5/serializer/Document_serialize_void_elements.phpt
rename to ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_void_elements.phpt
index b75940da5c2f3..c5ac9a6d71a7c 100644
--- a/ext/dom/tests/HTML5/serializer/Document_serialize_void_elements.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_void_elements.phpt
@@ -1,11 +1,11 @@
--TEST--
-DOM\HTML5Document serialization of void elements
+DOM\HTMLDocument serialization of void elements
--EXTENSIONS--
dom
--FILE--
+--EXPECT--
+object(DOM\XMLDocument)#1 (37) {
+ ["encoding"]=>
+ string(5) "UTF-8"
+ ["xmlEncoding"]=>
+ string(5) "UTF-8"
+ ["standalone"]=>
+ bool(false)
+ ["xmlStandalone"]=>
+ bool(false)
+ ["version"]=>
+ string(3) "1.0"
+ ["xmlVersion"]=>
+ string(3) "1.0"
+ ["formatOutput"]=>
+ bool(false)
+ ["validateOnParse"]=>
+ bool(false)
+ ["resolveExternals"]=>
+ bool(false)
+ ["preserveWhiteSpace"]=>
+ bool(true)
+ ["recover"]=>
+ bool(false)
+ ["substituteEntities"]=>
+ bool(false)
+ ["doctype"]=>
+ NULL
+ ["documentElement"]=>
+ NULL
+ ["strictErrorChecking"]=>
+ bool(true)
+ ["documentURI"]=>
+ NULL
+ ["firstElementChild"]=>
+ NULL
+ ["lastElementChild"]=>
+ NULL
+ ["childElementCount"]=>
+ int(0)
+ ["nodeName"]=>
+ string(9) "#document"
+ ["nodeValue"]=>
+ NULL
+ ["nodeType"]=>
+ int(9)
+ ["parentNode"]=>
+ NULL
+ ["parentElement"]=>
+ NULL
+ ["childNodes"]=>
+ string(22) "(object value omitted)"
+ ["firstChild"]=>
+ NULL
+ ["lastChild"]=>
+ NULL
+ ["previousSibling"]=>
+ NULL
+ ["nextSibling"]=>
+ NULL
+ ["attributes"]=>
+ NULL
+ ["isConnected"]=>
+ bool(true)
+ ["ownerDocument"]=>
+ NULL
+ ["namespaceURI"]=>
+ NULL
+ ["prefix"]=>
+ string(0) ""
+ ["localName"]=>
+ NULL
+ ["baseURI"]=>
+ NULL
+ ["textContent"]=>
+ string(0) ""
+}
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_01.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_01.phpt
new file mode 100644
index 0000000000000..cc8f3f159b086
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_01.phpt
@@ -0,0 +1,16 @@
+--TEST--
+DOM\XMLDocument::createEmpty 01
+--EXTENSIONS--
+dom
+--FILE--
+
+--EXPECTF--
+Fatal error: Uncaught ValueError: DOM\XMLDocument::createEmpty(): Argument #2 ($encoding) is not a valid document encoding in %s:%d
+Stack trace:
+#0 %s(%d): DOM\XMLDocument::createEmpty('1.0', 'foo')
+#1 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_02.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_02.phpt
new file mode 100644
index 0000000000000..5f7a604bb12b2
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_02.phpt
@@ -0,0 +1,88 @@
+--TEST--
+DOM\XMLDocument::createEmpty 02
+--EXTENSIONS--
+dom
+--FILE--
+
+--EXPECT--
+object(DOM\XMLDocument)#1 (37) {
+ ["encoding"]=>
+ string(5) "UTF-8"
+ ["xmlEncoding"]=>
+ string(5) "UTF-8"
+ ["standalone"]=>
+ bool(false)
+ ["xmlStandalone"]=>
+ bool(false)
+ ["version"]=>
+ string(3) "1.1"
+ ["xmlVersion"]=>
+ string(3) "1.1"
+ ["formatOutput"]=>
+ bool(false)
+ ["validateOnParse"]=>
+ bool(false)
+ ["resolveExternals"]=>
+ bool(false)
+ ["preserveWhiteSpace"]=>
+ bool(true)
+ ["recover"]=>
+ bool(false)
+ ["substituteEntities"]=>
+ bool(false)
+ ["doctype"]=>
+ NULL
+ ["documentElement"]=>
+ NULL
+ ["strictErrorChecking"]=>
+ bool(true)
+ ["documentURI"]=>
+ NULL
+ ["firstElementChild"]=>
+ NULL
+ ["lastElementChild"]=>
+ NULL
+ ["childElementCount"]=>
+ int(0)
+ ["nodeName"]=>
+ string(9) "#document"
+ ["nodeValue"]=>
+ NULL
+ ["nodeType"]=>
+ int(9)
+ ["parentNode"]=>
+ NULL
+ ["parentElement"]=>
+ NULL
+ ["childNodes"]=>
+ string(22) "(object value omitted)"
+ ["firstChild"]=>
+ NULL
+ ["lastChild"]=>
+ NULL
+ ["previousSibling"]=>
+ NULL
+ ["nextSibling"]=>
+ NULL
+ ["attributes"]=>
+ NULL
+ ["isConnected"]=>
+ bool(true)
+ ["ownerDocument"]=>
+ NULL
+ ["namespaceURI"]=>
+ NULL
+ ["prefix"]=>
+ string(0) ""
+ ["localName"]=>
+ NULL
+ ["baseURI"]=>
+ NULL
+ ["textContent"]=>
+ string(0) ""
+}
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_03.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_03.phpt
new file mode 100644
index 0000000000000..590fdd976bd09
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromEmptyDocument_03.phpt
@@ -0,0 +1,15 @@
+--TEST--
+DOM\XMLDocument::createEmpty 03
+--EXTENSIONS--
+dom
+--FILE--
+append("foo");
+echo $dom->saveXML();
+
+?>
+--EXPECT--
+
+foo
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromFile_01.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromFile_01.phpt
new file mode 100644
index 0000000000000..5e4243795f7cc
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromFile_01.phpt
@@ -0,0 +1,16 @@
+--TEST--
+DOM\XMLDocument::createFromFile 01
+--EXTENSIONS--
+dom
+--FILE--
+
+--EXPECTF--
+Fatal error: Uncaught ValueError: DOM\XMLDocument::createFromString(): Argument #1 ($source) must not be empty in %s:%d
+Stack trace:
+#0 %s(%d): DOM\XMLDocument::createFromString('')
+#1 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromFile_02.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromFile_02.phpt
new file mode 100644
index 0000000000000..2d593c124ab8d
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromFile_02.phpt
@@ -0,0 +1,18 @@
+--TEST--
+DOM\XMLDocument::createFromFile 02
+--EXTENSIONS--
+dom
+--FILE--
+
+--EXPECTF--
+Warning: DOM\XMLDocument::createFromFile(): I/O warning : failed to load external entity "%s" in %s on line %d
+
+Fatal error: Uncaught Exception: Cannot open file '\0' in %s:%d
+Stack trace:
+#0 %s(%d): DOM\XMLDocument::createFromFile('\\0')
+#1 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromFile_03.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromFile_03.phpt
new file mode 100644
index 0000000000000..f129450b38c9a
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromFile_03.phpt
@@ -0,0 +1,16 @@
+--TEST--
+DOM\XMLDocument::createFromFile 03
+--EXTENSIONS--
+dom
+--FILE--
+getMessage();
+}
+
+?>
+--EXPECT--
+DOM\XMLDocument::createFromFile(): Argument #1 ($path) must not contain percent-encoded NUL bytes
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromFile_04.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromFile_04.phpt
new file mode 100644
index 0000000000000..5b6798286fdb3
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromFile_04.phpt
@@ -0,0 +1,23 @@
+--TEST--
+DOM\XMLDocument::createFromFile 04
+--EXTENSIONS--
+dom
+--FILE--
+saveXML();
+
+?>
+--EXPECT--
+
+
+
+ The Grapes of Wrath
+ John Steinbeck
+
+
+ The Pearl
+ John Steinbeck
+
+
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromString_01.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromString_01.phpt
new file mode 100644
index 0000000000000..f4b7771e852c5
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromString_01.phpt
@@ -0,0 +1,16 @@
+--TEST--
+DOM\XMLDocument::createFromString 01
+--EXTENSIONS--
+dom
+--FILE--
+
+--EXPECTF--
+Fatal error: Uncaught ValueError: DOM\XMLDocument::createFromString(): Argument #1 ($source) must not be empty in %s:%d
+Stack trace:
+#0 %s(%d): DOM\XMLDocument::createFromString('')
+#1 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromString_02.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromString_02.phpt
new file mode 100644
index 0000000000000..a3a7ed4b549d2
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromString_02.phpt
@@ -0,0 +1,15 @@
+--TEST--
+DOM\XMLDocument::createFromString 02
+--EXTENSIONS--
+dom
+--FILE--
+ ');
+var_dump($dom->saveXMLFile("php://stdout"));
+
+?>
+--EXPECT--
+
+
+int(35)
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt
new file mode 100644
index 0000000000000..e7305d1aacb78
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt
@@ -0,0 +1,24 @@
+--TEST--
+DOM\XMLDocument::createFromString 03
+--EXTENSIONS--
+dom
+--FILE--
+ ', -1);
+} catch (ValueError $e) {
+ echo $e->getMessage();
+}
+
+foreach ($flags as $flag) {
+ DOM\XMLDocument::createFromString(' ', $flag);
+}
+
+?>
+--EXPECT--
+DOM\XMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOENT, LIBXML_DTDLOAD, LIBXML_DTDATTR, LIBXML_DTDVALID, LIBXML_NOERROR, LIBXML_NOWARNING, LIBXML_NOBLANKS, LIBXML_XINCLUDE, LIBXML_NSCLEAN, LIBXML_NOCDATA, LIBXML_NONET, LIBXML_PEDANTIC, LIBXML_COMPACT, LIBXML_PARSEHUGE, LIBXML_BIGLINES)
diff --git a/ext/dom/tests/HTML5/interactions/Document_node_ownerDocument_for_XML.phpt b/ext/dom/tests/modern/xml/XMLDocument_node_ownerDocument_for_XML.phpt
similarity index 82%
rename from ext/dom/tests/HTML5/interactions/Document_node_ownerDocument_for_XML.phpt
rename to ext/dom/tests/modern/xml/XMLDocument_node_ownerDocument_for_XML.phpt
index 9f083c0ca42cc..d9d5f9c052de9 100644
--- a/ext/dom/tests/HTML5/interactions/Document_node_ownerDocument_for_XML.phpt
+++ b/ext/dom/tests/modern/xml/XMLDocument_node_ownerDocument_for_XML.phpt
@@ -1,12 +1,11 @@
--TEST--
-HTML5Document getting ownerDocument from a node in an XML document should yield a HTML5Document
+DOM\XMLDocument getting ownerDocument from a node in an XML document should yield a DOM\XMLDocument
--EXTENSIONS--
dom
--FILE--
loadXML(' ');
+$dom = DOM\XMLDocument::createFromString(' ');
$element = $dom->documentElement;
unset($dom);
@@ -14,11 +13,9 @@ var_dump($element->ownerDocument);
?>
--EXPECTF--
-object(DOM\HTML5Document)#1 (40) {
+object(DOM\XMLDocument)#1 (37) {
["encoding"]=>
NULL
- ["actualEncoding"]=>
- NULL
["xmlEncoding"]=>
NULL
["standalone"]=>
@@ -29,8 +26,6 @@ object(DOM\HTML5Document)#1 (40) {
string(3) "1.0"
["xmlVersion"]=>
string(3) "1.0"
- ["config"]=>
- NULL
["formatOutput"]=>
bool(false)
["validateOnParse"]=>
@@ -45,8 +40,6 @@ object(DOM\HTML5Document)#1 (40) {
bool(false)
["doctype"]=>
NULL
- ["implementation"]=>
- string(22) "(object value omitted)"
["documentElement"]=>
string(22) "(object value omitted)"
["strictErrorChecking"]=>
diff --git a/ext/dom/tests/modern/xml/XMLDocument_saveXML_node.phpt b/ext/dom/tests/modern/xml/XMLDocument_saveXML_node.phpt
new file mode 100644
index 0000000000000..73a48eb454dd1
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_saveXML_node.phpt
@@ -0,0 +1,16 @@
+--TEST--
+DOM\XMLDocument::saveXML(File) node
+--EXTENSIONS--
+dom
+--FILE--
+appendChild($dom->createElement("root"));
+$child1 = $root->appendChild($dom->createElement("child1"));
+$child2 = $root->appendChild($dom->createElement("child2"));
+echo $dom->saveXML($child1);
+
+?>
+--EXPECT--
+
diff --git a/ext/dom/xml_document.c b/ext/dom/xml_document.c
new file mode 100644
index 0000000000000..1e2c0595c30b6
--- /dev/null
+++ b/ext/dom/xml_document.c
@@ -0,0 +1,194 @@
+/*
+ +----------------------------------------------------------------------+
+ | Copyright (c) The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | https://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Niels Dossche |
+ +----------------------------------------------------------------------+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "php.h"
+#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
+#include "php_dom.h"
+#include "namespace_compat.h"
+
+static bool check_options_validity(zend_long options)
+{
+ const zend_long VALID_OPTIONS = XML_PARSE_NOENT | XML_PARSE_DTDLOAD | XML_PARSE_DTDATTR | XML_PARSE_DTDVALID | XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NOBLANKS | XML_PARSE_XINCLUDE | XML_PARSE_NSCLEAN | XML_PARSE_NOCDATA | XML_PARSE_NONET | XML_PARSE_PEDANTIC | XML_PARSE_COMPACT | XML_PARSE_HUGE | XML_PARSE_BIG_LINES;
+ if ((options & ~VALID_OPTIONS) != 0) {
+ zend_argument_value_error(2, "contains invalid flags (allowed flags: LIBXML_NOENT, LIBXML_DTDLOAD, LIBXML_DTDATTR, LIBXML_DTDVALID, LIBXML_NOERROR, LIBXML_NOWARNING, LIBXML_NOBLANKS, LIBXML_XINCLUDE, LIBXML_NSCLEAN, LIBXML_NOCDATA, LIBXML_NONET, LIBXML_PEDANTIC, LIBXML_COMPACT, LIBXML_PARSEHUGE, LIBXML_BIGLINES)");
+ return false;
+ }
+ return true;
+}
+
+/* Living spec never creates explicit namespace declaration nodes.
+ * They are only written upon serialization but never appear in the tree.
+ * So in principle we could just ignore them outright.
+ * However, step 10 in https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token
+ * requires us to have the declaration as an attribute available */
+static void dom_mark_namespaces_as_attributes_too(xmlDocPtr doc)
+{
+ xmlNodePtr node = doc->children;
+ while (node != NULL) {
+ if (node->type == XML_ELEMENT_NODE) {
+ dom_ns_compat_mark_attribute_list(node->nsDef);
+
+ if (node->children) {
+ node = node->children;
+ continue;
+ }
+ }
+
+ if (node->next) {
+ node = node->next;
+ } else {
+ /* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */
+ do {
+ node = node->parent;
+ if (node == NULL) {
+ return;
+ }
+ } while (node->next == NULL);
+ node = node->next;
+ }
+ }
+}
+
+void dom_mark_namespaces_for_copy_based_on_copy(xmlNodePtr copy, const xmlNode *original)
+{
+ xmlNodePtr copy_current = copy;
+ const xmlNode *original_current = original;
+ while (copy_current != NULL) {
+ ZEND_ASSERT(original_current != NULL);
+
+ if (copy_current->type == XML_ELEMENT_NODE) {
+ dom_ns_compat_copy_attribute_list_mark(copy_current->nsDef, original_current->nsDef);
+
+ if (copy_current->children) {
+ copy_current = copy_current->children;
+ original_current = original_current->children;
+ continue;
+ }
+ }
+
+ if (copy_current->next) {
+ copy_current = copy_current->next;
+ original_current = original_current->next;
+ } else {
+ /* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */
+ do {
+ copy_current = copy_current->parent;
+ if (copy_current == NULL) {
+ return;
+ }
+ original_current = original_current->parent;
+ } while (copy_current->next == NULL);
+ copy_current = copy_current->next;
+ original_current = original_current->next;
+ }
+ }
+}
+
+PHP_METHOD(DOM_XMLDocument, createEmpty)
+{
+ const char *version = NULL;
+ size_t encoding_len = strlen("UTF-8");
+ const char *encoding = "UTF-8";
+ size_t version_len;
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &version, &version_len, &encoding, &encoding_len) == FAILURE) {
+ RETURN_THROWS();
+ }
+
+ xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
+
+ if (handler != NULL) {
+ xmlCharEncCloseFunc(handler);
+ } else {
+ zend_argument_value_error(2, "is not a valid document encoding");
+ RETURN_THROWS();
+ }
+
+ xmlDocPtr lxml_doc = xmlNewDoc((const xmlChar *) version);
+ if (UNEXPECTED(lxml_doc == NULL)) {
+ goto oom;
+ }
+
+ lxml_doc->encoding = xmlStrdup((const xmlChar *) encoding);
+
+ dom_object *intern = php_dom_instantiate_object_helper(return_value, dom_xml_document_class_entry, (xmlNodePtr) lxml_doc, NULL);
+ intern->document->is_modern_api_class = true;
+ return;
+
+oom:
+ php_dom_throw_error(INVALID_STATE_ERR, 1);
+ RETURN_THROWS();
+}
+
+static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
+{
+ const char *source;
+ size_t source_len;
+ zend_long options = 0;
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &source, &source_len, &options) == FAILURE) {
+ RETURN_THROWS();
+ }
+
+ if (!source_len) {
+ zend_argument_value_error(1, "must not be empty");
+ RETURN_THROWS();
+ }
+
+ if (ZEND_SIZE_T_INT_OVFL(source_len)) {
+ zend_argument_value_error(1, "must not exceed INT_MAX in length");
+ RETURN_THROWS();
+ }
+
+ /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */
+ if (mode == DOM_LOAD_FILE && strstr(source, "%00")) {
+ zend_argument_value_error(1, "must not contain percent-encoded NUL bytes");
+ RETURN_THROWS();
+ }
+
+ if (!check_options_validity(options)) {
+ RETURN_THROWS();
+ }
+
+ xmlDocPtr lxml_doc = dom_document_parser(NULL, mode, source, source_len, options);
+ if (UNEXPECTED(lxml_doc == NULL)) {
+ if (!EG(exception)) {
+ if (mode == DOM_LOAD_FILE) {
+ zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", source);
+ } else {
+ php_dom_throw_error(INVALID_STATE_ERR, 1);
+ }
+ }
+ RETURN_THROWS();
+ }
+ dom_object *intern = php_dom_instantiate_object_helper(return_value, dom_xml_document_class_entry, (xmlNodePtr) lxml_doc, NULL);
+ intern->document->is_modern_api_class = true;
+ dom_mark_namespaces_as_attributes_too(lxml_doc);
+}
+
+PHP_METHOD(DOM_XMLDocument, createFromString)
+{
+ load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
+}
+
+PHP_METHOD(DOM_XMLDocument, createFromFile)
+{
+ load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
+}
+
+#endif /* HAVE_LIBXML && HAVE_DOM */
diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c
index 8d0f4a0b7b4b5..7522ec3f1df86 100644
--- a/ext/dom/xpath.c
+++ b/ext/dom/xpath.c
@@ -215,7 +215,7 @@ PHP_METHOD(DOMXPath, __construct)
dom_xpath_object *intern;
xmlXPathContextPtr ctx, oldctx;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "O|b", &doc, dom_document_class_entry, ®ister_node_ns) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "O|b", &doc, dom_abstract_base_document_class_entry, ®ister_node_ns) == FAILURE) {
RETURN_THROWS();
}
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index b55f57e2752d9..03fcbd6acf8af 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -1356,7 +1356,7 @@ PHP_LIBXML_API int php_libxml_increment_doc_ref(php_libxml_node_object *object,
object->document->refcount = ret_refcount;
object->document->doc_props = NULL;
object->document->cache_tag.modification_nr = 1; /* iterators start at 0, such that they will start in an uninitialised state */
- object->document->is_html5_class = false;
+ object->document->is_modern_api_class = false;
}
return ret_refcount;
diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h
index 7ffb93274ba7d..fd8c76be0eb92 100644
--- a/ext/libxml/php_libxml.h
+++ b/ext/libxml/php_libxml.h
@@ -66,7 +66,7 @@ typedef struct _php_libxml_ref_obj {
libxml_doc_props *doc_props;
php_libxml_cache_tag cache_tag;
int refcount;
- bool is_html5_class;
+ bool is_modern_api_class;
} php_libxml_ref_obj;
typedef struct _php_libxml_node_ptr {
From 73dfdd2e6fe1e14f79e046b98042fbe276f1235c Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 22 Sep 2023 09:02:22 +0200
Subject: [PATCH 11/53] Add libxml2 bug workaround
---
ext/dom/php_dom.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index 0701227c6e7ee..41dbff36d14be 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -498,6 +498,12 @@ static void dom_update_refcount_after_clone(dom_object *original, xmlNodePtr ori
php_libxml_increment_node_ptr((php_libxml_node_object *)clone, cloned_node, (void *)clone);
if (original->document != clone->document) {
dom_copy_doc_props(original->document, clone->document);
+ /* Workaround libxml2 bug, see https://gitlab.gnome.org/GNOME/libxml2/-/commit/07920b4381873187c02df53fa9b5d44aff3a7041 */
+#if LIBXML_VERSION < 20911
+ if (original_node->type == XML_HTML_DOCUMENT_NODE) {
+ cloned_node->type = XML_HTML_DOCUMENT_NODE;
+ }
+#endif
}
}
From 8181d9e807a1e41b143e968e6f45734c792c6f37 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 23 Sep 2023 19:40:41 +0200
Subject: [PATCH 12/53] Update tree error reporting
---
ext/dom/html_document.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 4766d8884efaa..5878d3ab5a08c 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -45,6 +45,7 @@ typedef struct {
size_t current_input_length;
size_t current_total_offset;
dom_line_column_cache cache_tokenizer;
+ bool html_no_implied;
} dom_lexbor_libxml2_bridge_application_data;
typedef struct {
@@ -252,6 +253,12 @@ static void dom_lexbor_libxml2_bridge_tokenizer_error_reporter(void *application
static void dom_lexbor_libxml2_bridge_tree_error_reporter(void *application_data_voidptr, lxb_html_tree_error_t *error, size_t line, size_t column, size_t len)
{
dom_lexbor_libxml2_bridge_application_data *application_data = application_data_voidptr;
+
+ if (line == 1 && application_data->html_no_implied && error->id == LXB_HTML_RULES_ERROR_UNTOININMO) {
+ /* For no implied mode, we want to mimick libxml's behaviour of not reporting an error for a lacking doctype. */
+ return;
+ }
+
if (UNEXPECTED(len <= 1)) {
/* Possible with EOF, or single-character tokens, don't use a range in the error display in this case */
php_libxml_pretend_ctx_error_ex(line, column, "tree error %s in %s, line: %zu, column: %zu\n", dom_lexbor_tree_error_code_to_string(error->id), application_data->input_name, line, column);
@@ -549,6 +556,7 @@ PHP_METHOD(DOM_HTMLDocument, createFromString)
dom_lexbor_libxml2_bridge_application_data application_data;
application_data.input_name = "Entity";
application_data.current_total_offset = 0;
+ application_data.html_no_implied = options & HTML_PARSE_NOIMPLIED;
dom_reset_line_column_cache(&application_data.cache_tokenizer);
lexbor_libxml2_bridge_parse_context ctx;
lexbor_libxml2_bridge_parse_context_init(&ctx);
@@ -660,6 +668,7 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
dom_lexbor_libxml2_bridge_application_data application_data;
application_data.input_name = filename;
application_data.current_total_offset = 0;
+ application_data.html_no_implied = options & HTML_PARSE_NOIMPLIED;
dom_reset_line_column_cache(&application_data.cache_tokenizer);
lexbor_libxml2_bridge_parse_context ctx;
lexbor_libxml2_bridge_parse_context_init(&ctx);
From 04bea28cbc7ae62dfc8c109549df7afd5ca5937e Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 24 Sep 2023 19:59:03 +0200
Subject: [PATCH 13/53] Amends
---
ext/dom/html_document.c | 2 +-
.../encoding/HTMLDocument_encoding_field_test.phpt | 2 +-
...ment_fromString_LIBXML_HTML_NOIMPLIED_error.phpt | 13 +++++++++++++
3 files changed, 15 insertions(+), 2 deletions(-)
create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_HTML_NOIMPLIED_error.phpt
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 5878d3ab5a08c..9fd36cd0eeb6f 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -512,7 +512,7 @@ PHP_METHOD(DOM_HTMLDocument, createEmpty)
const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name((const lxb_char_t *) encoding, encoding_len);
if (encoding_data == NULL) {
- zend_argument_value_error(1, "is not a valid document encoding");
+ zend_argument_value_error(1, "must be a valid document encoding");
RETURN_THROWS();
}
diff --git a/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_field_test.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_field_test.phpt
index dd9251608af84..bab3532565b82 100644
--- a/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_field_test.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_encoding_field_test.phpt
@@ -40,4 +40,4 @@ string(6) "EUC-KR"
string(12) "windows-1251"
Invalid document encoding
string(12) "windows-1251"
-DOM\HTMLDocument::createEmpty(): Argument #1 ($encoding) is not a valid document encoding
+DOM\HTMLDocument::createEmpty(): Argument #1 ($encoding) must be a valid document encoding
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_HTML_NOIMPLIED_error.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_HTML_NOIMPLIED_error.phpt
new file mode 100644
index 0000000000000..26f322f7127e1
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_HTML_NOIMPLIED_error.phpt
@@ -0,0 +1,13 @@
+--TEST--
+DOM\HTMLDocument::createFromString() with LIBXML_HTML_NOIMPLIED - tree error should not happen
+--EXTENSIONS--
+dom
+--FILE--
+foo", LIBXML_HTML_NOIMPLIED);
+echo $dom->saveHTML();
+
+?>
+--EXPECT--
+foo
From 3d848e32d91a54195f3f4d1f2365a78fa05cb1bb Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Tue, 26 Sep 2023 22:19:37 +0200
Subject: [PATCH 14/53] DOMException name
---
ext/dom/php_dom.stub.php | 2 +-
ext/dom/php_dom_arginfo.h | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index 71d705ef94b68..95506befbab3d 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -797,7 +797,7 @@ public function validate(): bool {}
public function xinclude(int $options = 0): int|false {}
}
- /** @alias DOM\Exception */
+ /** @alias DOM\DOMException */
final class DOMException extends Exception
{
/**
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index 96e7149837380..95cb28e294f72 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: afcf0dfba2c9d3ae0334f129f5852229bdde8d5f */
+ * Stub hash: 8493d85f8aa611f9739be2d59568d98ea4506522 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -1781,7 +1781,7 @@ static zend_class_entry *register_class_DOMException(zend_class_entry *class_ent
INIT_CLASS_ENTRY(ce, "DOMException", class_DOMException_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_Exception);
class_entry->ce_flags |= ZEND_ACC_FINAL;
- zend_register_class_alias("DOM\\Exception", class_entry);
+ zend_register_class_alias("DOM\\DOMException", class_entry);
zval property_code_default_value;
ZVAL_LONG(&property_code_default_value, 0);
From b14e1a3829a307bc8ab4548596a99a0fab37eae8 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Thu, 28 Sep 2023 21:15:45 +0200
Subject: [PATCH 15/53] Add interaction test with getElementsByTagName(NS)
---
.../HTMLDocument_getElementsByTagName.phpt | 98 +++++++++++++++++++
1 file changed, 98 insertions(+)
create mode 100644 ext/dom/tests/modern/html/interactions/HTMLDocument_getElementsByTagName.phpt
diff --git a/ext/dom/tests/modern/html/interactions/HTMLDocument_getElementsByTagName.phpt b/ext/dom/tests/modern/html/interactions/HTMLDocument_getElementsByTagName.phpt
new file mode 100644
index 0000000000000..3af5c2d01eb72
--- /dev/null
+++ b/ext/dom/tests/modern/html/interactions/HTMLDocument_getElementsByTagName.phpt
@@ -0,0 +1,98 @@
+--TEST--
+Test DOM\HTMLDocument::getElementsByTagName(NS)
+--EXTENSIONS--
+dom
+--FILE--
+
+
+
+ Test
+
+
+ Test
+ Test
+
+
+
+
+
+
+
+
+HTML);
+
+echo "--- getElementsByTagName ---\n";
+
+var_dump($dom->getElementsByTagName("p")[0]?->nodeName);
+var_dump($dom->getElementsByTagName("math")[0]?->nodeName);
+var_dump($dom->getElementsByTagName("mtable")[0]?->nodeName);
+var_dump($dom->getElementsByTagName("svg")[0]?->nodeName);
+var_dump($dom->getElementsByTagName("circle")[0]?->nodeName);
+
+echo "--- getElementsByTagNameNS (*) ---\n";
+
+var_dump($dom->getElementsByTagNameNS("*", "p")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("*", "math")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("*", "mtable")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("*", "svg")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("*", "circle")[0]?->nodeName);
+
+echo "--- getElementsByTagNameNS (xhtml) ---\n";
+
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "p")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "math")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "mtable")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "svg")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1999/xhtml", "circle")[0]?->nodeName);
+
+echo "--- getElementsByTagNameNS (svg) ---\n";
+
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/2000/svg", "p")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/2000/svg", "math")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/2000/svg", "mtable")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/2000/svg", "svg")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/2000/svg", "circle")[0]?->nodeName);
+
+echo "--- getElementsByTagNameNS (math) ---\n";
+
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1998/Math/MathML", "p")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1998/Math/MathML", "math")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1998/Math/MathML", "mtable")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1998/Math/MathML", "svg")[0]?->nodeName);
+var_dump($dom->getElementsByTagNameNS("http://www.w3.org/1998/Math/MathML", "circle")[0]?->nodeName);
+
+?>
+--EXPECT--
+--- getElementsByTagName ---
+string(1) "p"
+string(4) "math"
+string(6) "mtable"
+string(3) "svg"
+string(6) "circle"
+--- getElementsByTagNameNS (*) ---
+string(1) "p"
+string(4) "math"
+string(6) "mtable"
+string(3) "svg"
+string(6) "circle"
+--- getElementsByTagNameNS (xhtml) ---
+string(1) "p"
+NULL
+NULL
+NULL
+NULL
+--- getElementsByTagNameNS (svg) ---
+NULL
+NULL
+NULL
+string(3) "svg"
+string(6) "circle"
+--- getElementsByTagNameNS (math) ---
+NULL
+string(4) "math"
+string(6) "mtable"
+NULL
+NULL
From a8a7e969789aa3d8eda0cf66fd00900d4be8ef57 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Thu, 28 Sep 2023 22:35:37 +0200
Subject: [PATCH 16/53] Wire up documentURI
---
ext/dom/html_document.c | 34 ++++++++++++--
.../HTMLDocument_documentURI.phpt | 47 +++++++++++++++++++
.../modern/html/interactions/test foo.html | 1 +
3 files changed, 79 insertions(+), 3 deletions(-)
create mode 100644 ext/dom/tests/modern/html/interactions/HTMLDocument_documentURI.phpt
create mode 100644 ext/dom/tests/modern/html/interactions/test foo.html
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 9fd36cd0eeb6f..360e6792cd1e8 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -731,9 +731,6 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
}
}
- php_stream_close(stream);
- stream = NULL;
-
if (!dom_parse_decode_encode_finish(&ctx, document, parser, &decoding_encoding_ctx, &tokenizer_error_offset, &tree_error_offset)) {
goto fail_oom;
}
@@ -749,6 +746,7 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
if (UNEXPECTED(bridge_status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
php_libxml_ctx_error(NULL, "%s in %s", dom_lexbor_libxml2_bridge_status_code_to_string(bridge_status), filename);
lxb_html_document_destroy(document);
+ php_stream_close(stream);
RETURN_FALSE;
}
lxb_html_document_destroy(document);
@@ -761,6 +759,36 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
}
+ if (stream->wrapper == &php_plain_files_wrapper) {
+ // TODO: do the same for XMLDocument?
+ xmlChar *converted = xmlPathToURI((const xmlChar *) filename);
+ if (UNEXPECTED(!converted)) {
+ goto fail_oom;
+ }
+ /* Check for "file:/"" instead of "file://" because of libxml2 quirk */
+ if (strncmp((const char *) converted, "file:/", sizeof("file:/") - 1) != 0) {
+ xmlChar *buffer = xmlStrdup((const xmlChar *) "file://");
+ if (UNEXPECTED(!buffer)) {
+ xmlFree(converted);
+ goto fail_oom;
+ }
+ xmlChar *new_buffer = xmlStrcat(buffer, converted);
+ if (UNEXPECTED(!new_buffer)) {
+ xmlFree(buffer);
+ xmlFree(converted);
+ goto fail_oom;
+ }
+ xmlFree(converted);
+ lxml_doc->URL = new_buffer;
+ } else {
+ lxml_doc->URL = converted;
+ }
+ } else {
+ lxml_doc->URL = xmlStrdup((const xmlChar *) filename);
+ }
+
+ php_stream_close(stream);
+
dom_object *intern = php_dom_instantiate_object_helper(return_value, dom_html_document_class_entry, (xmlNodePtr) lxml_doc, NULL);
intern->document->is_modern_api_class = true;
return;
diff --git a/ext/dom/tests/modern/html/interactions/HTMLDocument_documentURI.phpt b/ext/dom/tests/modern/html/interactions/HTMLDocument_documentURI.phpt
new file mode 100644
index 0000000000000..9430aac9da30a
--- /dev/null
+++ b/ext/dom/tests/modern/html/interactions/HTMLDocument_documentURI.phpt
@@ -0,0 +1,47 @@
+--TEST--
+DOM\HTMLDocument::documentURI
+--EXTENSIONS--
+dom
+--FILE--
+documentURI);
+
+$memory = fopen("php://memory", "w+");
+fwrite($memory, "foobar");
+rewind($memory);
+$dom = DOM\HTMLDocument::createFromFile("php://memory");
+var_dump($dom->documentURI);
+fclose($memory);
+
+class DummyWrapper {
+ public $context;
+
+ public function stream_open($path, $mode, $options, &$opened_path) {
+ return true;
+ }
+
+ public function stream_read($count) {
+ return "";
+ }
+
+ public function stream_eof() {
+ return true;
+ }
+
+ public function stream_close() {
+ return true;
+ }
+}
+
+stream_wrapper_register("dummy", DummyWrapper::class);
+
+$dom = DOM\HTMLDocument::createFromFile("dummy://foo/ bar");
+var_dump($dom->documentURI);
+
+?>
+--EXPECTF--
+string(%d) "file:/%stest%20foo.html"
+string(12) "php://memory"
+string(16) "dummy://foo/ bar"
diff --git a/ext/dom/tests/modern/html/interactions/test foo.html b/ext/dom/tests/modern/html/interactions/test foo.html
new file mode 100644
index 0000000000000..19102815663d2
--- /dev/null
+++ b/ext/dom/tests/modern/html/interactions/test foo.html
@@ -0,0 +1 @@
+foo
\ No newline at end of file
From dc13d175dc446ef2a7c899b21bae29a89ad44bb5 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 29 Sep 2023 00:34:25 +0200
Subject: [PATCH 17/53] Adjustment for namespace reconciliation revert
---
ext/dom/html_document.c | 8 ++++-
...tring_LIBXML_HTML_NOIMPLIED_namespace.phpt | 29 +++++++++++++++++++
2 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 360e6792cd1e8..a0607ea4dadb1 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -315,7 +315,13 @@ static void dom_post_process_html5_loading(xmlDocPtr lxml_doc, zend_long options
}
dom_place_remove_element_and_hoist_children((xmlNodePtr) lxml_doc, "html");
if (!(options & DOM_HTML_NO_DEFAULT_NS) && EXPECTED(lxml_doc->children != NULL)) {
- dom_reconcile_ns_list(lxml_doc, lxml_doc->children, lxml_doc->last);
+ xmlNodePtr node = lxml_doc->children;
+ while (node) {
+ /* Fine to use the DOM wrap reconciliation here because it's the "modern" world of DOM, and no user manipulation happened yet. */
+ xmlDOMWrapCtxt dummy_ctxt = {0};
+ xmlDOMWrapReconcileNamespaces(&dummy_ctxt, node, /* options */ 0);
+ node = node->next;
+ }
}
}
}
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_HTML_NOIMPLIED_namespace.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_HTML_NOIMPLIED_namespace.phpt
index 2ed7112f63884..ea384bb7885f3 100644
--- a/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_HTML_NOIMPLIED_namespace.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_LIBXML_HTML_NOIMPLIED_namespace.phpt
@@ -5,12 +5,41 @@ dom
--FILE--
saveXML();
+
+echo "--- Single element ---\n";
+
$dom = DOM\HTMLDocument::createFromString("foo
", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
echo $dom->saveXML();
var_dump($dom->documentElement->namespaceURI);
+var_dump($dom->documentElement->prefix);
+
+echo "--- Multiple elements ---\n";
+
+$dom = DOM\HTMLDocument::createFromString("foo
bar ", LIBXML_HTML_NOIMPLIED | LIBXML_NOERROR);
+echo $dom->saveXML();
+var_dump($dom->documentElement->namespaceURI);
+var_dump($dom->documentElement->prefix);
+var_dump($dom->documentElement->nextSibling->namespaceURI);
+var_dump($dom->documentElement->nextSibling->prefix);
?>
--EXPECT--
+--- No elements ---
+
+--- Single element ---
+
+foo
+string(28) "http://www.w3.org/1999/xhtml"
+string(0) ""
+--- Multiple elements ---
foo
+bar
+string(28) "http://www.w3.org/1999/xhtml"
+string(0) ""
string(28) "http://www.w3.org/1999/xhtml"
+string(0) ""
From 28f302b067dddf3efd4bb7f256893a5dbbd2d44b Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 4 Oct 2023 20:42:11 +0200
Subject: [PATCH 18/53] Test with noscript
---
.../modern/html/interactions/noscript.phpt | 46 +++++++++++++++++++
1 file changed, 46 insertions(+)
create mode 100644 ext/dom/tests/modern/html/interactions/noscript.phpt
diff --git a/ext/dom/tests/modern/html/interactions/noscript.phpt b/ext/dom/tests/modern/html/interactions/noscript.phpt
new file mode 100644
index 0000000000000..839845e330f22
--- /dev/null
+++ b/ext/dom/tests/modern/html/interactions/noscript.phpt
@@ -0,0 +1,46 @@
+--TEST--
+noscript behaviour
+--EXTENSIONS--
+dom
+--FILE--
+hi
", DOM\HTML_NO_DEFAULT_NS);
+var_dump($dom->documentElement->textContent);
+echo $dom->saveHTML(), "\n";
+echo $dom->saveXML();
+
+echo "--- Modifying the text content: tag ---\n";
+
+$xpath = new DOMXPath($dom);
+$noscript = $xpath->query("//noscript")[0];
+$noscript->textContent = "bye
";
+echo $dom->saveHTML(), "\n";
+echo $dom->saveXML();
+
+echo "--- Modifying the text content: trick ---\n";
+
+$noscript->textContent = "";
+echo $dom->saveHTML(), "\n";
+echo $dom->saveXML();
+
+?>
+--EXPECT--
+--- Parsing ---
+string(2) "hi"
+hi
+
+
+ hi
+--- Modifying the text content: tag ---
+<p>bye</p>
+
+
+ <p>bye</p>
+--- Modifying the text content: trick ---
+<!-- </noscript> -->
+
+
+ <!-- </noscript> -->
From efc4369eed6d698ed24bb56f631f75cba94a8c07 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 4 Oct 2023 21:59:27 +0200
Subject: [PATCH 19/53] Implement override_encoding
---
ext/dom/document.c | 6 +-
ext/dom/html_document.c | 122 ++++++++++--------
ext/dom/php_dom.h | 2 +-
ext/dom/php_dom.stub.php | 8 +-
ext/dom/php_dom_arginfo.h | 6 +-
...ment_createFromFile_override_encoding.phpt | 31 +++++
...nt_createFromString_override_encoding.phpt | 31 +++++
.../encoding/gb18030_without_charset.html | 7 +
...File_createFromString_BOM_buffer_edge.phpt | 26 ++++
...ment_createFromFile_override_encoding.phpt | 28 ++++
...nt_createFromString_override_encoding.phpt | 28 ++++
ext/dom/tests/modern/xml/dummy.xml | 2 +
ext/dom/xml_document.c | 18 ++-
13 files changed, 251 insertions(+), 64 deletions(-)
create mode 100644 ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_override_encoding.phpt
create mode 100644 ext/dom/tests/modern/html/encoding/HTMLDocument_createFromString_override_encoding.phpt
create mode 100644 ext/dom/tests/modern/html/encoding/gb18030_without_charset.html
create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_createFromFile_createFromString_BOM_buffer_edge.phpt
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt
create mode 100644 ext/dom/tests/modern/xml/dummy.xml
diff --git a/ext/dom/document.c b/ext/dom/document.c
index 123598d1baff0..517fdd09810e4 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -1197,7 +1197,7 @@ const char *_dom_get_valid_file_path(const char *source, char *resolved_path, in
}
/* }}} */
-xmlDocPtr dom_document_parser(zval *id, int mode, const char *source, size_t source_len, size_t options) /* {{{ */
+xmlDocPtr dom_document_parser(zval *id, int mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding) /* {{{ */
{
xmlDocPtr ret;
xmlParserCtxtPtr ctxt = NULL;
@@ -1240,6 +1240,8 @@ xmlDocPtr dom_document_parser(zval *id, int mode, const char *source, size_t sou
return(NULL);
}
+ (void) xmlSwitchToEncoding(ctxt, encoding);
+
/* If loading from memory, we need to set the base directory for the document */
if (mode != DOM_LOAD_FILE) {
#ifdef HAVE_GETCWD
@@ -1377,7 +1379,7 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode)
RETURN_FALSE;
}
- xmlDocPtr newdoc = dom_document_parser(ZEND_THIS, mode, source, source_len, options);
+ xmlDocPtr newdoc = dom_document_parser(ZEND_THIS, mode, source, source_len, options, NULL);
php_dom_finish_loading_document(ZEND_THIS, return_value, newdoc);
}
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index a0607ea4dadb1..7e99d743b3ea0 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -353,7 +353,7 @@ static dom_character_encoding_data dom_determine_encoding(const char *source, si
lxb_html_encoding_t encoding;
lxb_status_t status = lxb_html_encoding_init(&encoding);
if (status != LXB_STATUS_OK) {
- goto fallback;
+ goto fallback_uninit;
}
/* This is the "wait either for 1024 bytes or 500ms" part */
if (source_len > 1024) {
@@ -368,32 +368,47 @@ static dom_character_encoding_data dom_determine_encoding(const char *source, si
goto fallback;
}
result.encoding_data = lxb_encoding_data_by_pre_name(entry->name, entry->end - entry->name);
+ if (!result.encoding_data) {
+ goto fallback;
+ }
result.bom_shift = 0;
lxb_html_encoding_destroy(&encoding, false);
return result;
fallback:
+ lxb_html_encoding_destroy(&encoding, false);
+fallback_uninit:
result.encoding_data = lxb_encoding_data(DOM_FALLBACK_ENCODING_ID);
result.bom_shift = 0;
- lxb_html_encoding_destroy(&encoding, false);
return result;
}
-static void dom_setup_parser_encoding(const lxb_char_t **buf_ref, size_t *read, dom_decoding_encoding_ctx *decoding_encoding_ctx)
+static void dom_setup_parser_encoding_manually(const lxb_char_t *buf_start, const lxb_encoding_data_t *encoding_data, dom_decoding_encoding_ctx *decoding_encoding_ctx, dom_lexbor_libxml2_bridge_application_data *application_data)
{
static const lxb_codepoint_t replacement_codepoint = LXB_ENCODING_REPLACEMENT_CODEPOINT;
- dom_character_encoding_data dom_encoding_data = dom_determine_encoding((const char *) *buf_ref, *read);
- *buf_ref += dom_encoding_data.bom_shift;
- *read -= dom_encoding_data.bom_shift;
- decoding_encoding_ctx->decode_data = dom_encoding_data.encoding_data;
- if (decoding_encoding_ctx->decode_data == NULL) {
- decoding_encoding_ctx->decode_data = lxb_encoding_data(DOM_FALLBACK_ENCODING_ID);
- ZEND_ASSERT(decoding_encoding_ctx->decode_data != NULL);
- }
+ decoding_encoding_ctx->decode_data = encoding_data;
+
(void) lxb_encoding_decode_init(&decoding_encoding_ctx->decode, decoding_encoding_ctx->decode_data, decoding_encoding_ctx->codepoints, sizeof(decoding_encoding_ctx->codepoints) / sizeof(lxb_codepoint_t));
(void) lxb_encoding_decode_replace_set(&decoding_encoding_ctx->decode, &replacement_codepoint, LXB_ENCODING_REPLACEMENT_BUFFER_LEN);
decoding_encoding_ctx->fast_path = decoding_encoding_ctx->decode_data == decoding_encoding_ctx->encode_data; /* Note: encode_data is for UTF-8 */
+
+ if (decoding_encoding_ctx->fast_path) {
+ application_data->current_input_codepoints = NULL;
+ application_data->current_input_characters = (const char *) buf_start;
+ } else {
+ application_data->current_input_codepoints = decoding_encoding_ctx->codepoints;
+ application_data->current_input_characters = NULL;
+ }
+}
+
+static void dom_setup_parser_encoding_implicitly(const lxb_char_t **buf_ref, size_t *read, dom_decoding_encoding_ctx *decoding_encoding_ctx, dom_lexbor_libxml2_bridge_application_data *application_data)
+{
+ const char *buf_start = (const char *) *buf_ref;
+ dom_character_encoding_data dom_encoding_data = dom_determine_encoding(buf_start, *read);
+ *buf_ref += dom_encoding_data.bom_shift;
+ *read -= dom_encoding_data.bom_shift;
+ dom_setup_parser_encoding_manually((const lxb_char_t *) buf_start, dom_encoding_data.encoding_data, decoding_encoding_ctx, application_data);
}
static bool dom_process_parse_chunk(lexbor_libxml2_bridge_parse_context *ctx, lxb_html_document_t *document, lxb_html_parser_t *parser, size_t encoded_length, const lxb_char_t *encoding_output, size_t input_buffer_length, size_t *tokenizer_error_offset, size_t *tree_error_offset)
@@ -548,10 +563,10 @@ PHP_METHOD(DOM_HTMLDocument, createEmpty)
PHP_METHOD(DOM_HTMLDocument, createFromString)
{
- const char *source;
- size_t source_len;
+ const char *source, *override_encoding = NULL;
+ size_t source_len, override_encoding_len;
zend_long options = 0;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &source, &source_len, &options) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|lp!", &source, &source_len, &options, &override_encoding, &override_encoding_len) == FAILURE) {
RETURN_THROWS();
}
@@ -571,6 +586,24 @@ PHP_METHOD(DOM_HTMLDocument, createFromString)
}
ctx.application_data = &application_data;
+ size_t tokenizer_error_offset = 0;
+ size_t tree_error_offset = 0;
+
+ /* Setup everything encoding & decoding related */
+ const lxb_char_t *buf_ref = (const lxb_char_t *) source;
+ dom_decoding_encoding_ctx decoding_encoding_ctx;
+ dom_decoding_encoding_ctx_init(&decoding_encoding_ctx);
+ if (override_encoding != NULL) {
+ const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name((const lxb_char_t *) override_encoding, override_encoding_len);
+ if (!encoding_data) {
+ zend_argument_value_error(3, "must be a valid document encoding");
+ RETURN_THROWS();
+ }
+ dom_setup_parser_encoding_manually(buf_ref, encoding_data, &decoding_encoding_ctx, &application_data);
+ } else {
+ dom_setup_parser_encoding_implicitly(&buf_ref, &source_len, &decoding_encoding_ctx, &application_data);
+ }
+
lxb_html_document_t *document = lxb_html_document_create();
if (UNEXPECTED(document == NULL)) {
goto fail_oom;
@@ -581,24 +614,7 @@ PHP_METHOD(DOM_HTMLDocument, createFromString)
goto fail_oom;
}
- /* Setup everything encoding & decoding related */
- dom_decoding_encoding_ctx decoding_encoding_ctx;
- dom_decoding_encoding_ctx_init(&decoding_encoding_ctx);
-
lxb_html_parser_t *parser = document->dom_document.parser;
- size_t tokenizer_error_offset = 0;
- size_t tree_error_offset = 0;
-
- const lxb_char_t *buf_ref = (const lxb_char_t *) source;
- dom_setup_parser_encoding(&buf_ref, &source_len, &decoding_encoding_ctx);
-
- if (decoding_encoding_ctx.fast_path) {
- application_data.current_input_codepoints = NULL;
- application_data.current_input_characters = source;
- } else {
- application_data.current_input_codepoints = decoding_encoding_ctx.codepoints;
- application_data.current_input_characters = NULL;
- }
while (source_len > 0) {
size_t chunk_size = source_len;
@@ -653,11 +669,11 @@ PHP_METHOD(DOM_HTMLDocument, createFromString)
PHP_METHOD(DOM_HTMLDocument, createFromFile)
{
- const char *filename;
- size_t filename_len;
+ const char *filename, *override_encoding = NULL;
+ size_t filename_len, override_encoding_len;
zend_long options = 0;
php_stream *stream = NULL;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "p|l", &filename, &filename_len, &options) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "p|ls!", &filename, &filename_len, &options, &override_encoding, &override_encoding_len) == FAILURE) {
RETURN_THROWS();
}
@@ -683,6 +699,22 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
}
ctx.application_data = &application_data;
+ char buf[4096];
+
+ /* Setup everything encoding & decoding related */
+ dom_decoding_encoding_ctx decoding_encoding_ctx;
+ dom_decoding_encoding_ctx_init(&decoding_encoding_ctx);
+ bool should_determine_encoding_implicitly = true; /* First read => determine encoding implicitly */
+ if (override_encoding != NULL) {
+ const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name((const lxb_char_t *) override_encoding, override_encoding_len);
+ if (!encoding_data) {
+ zend_argument_value_error(3, "must be a valid document encoding");
+ RETURN_THROWS();
+ }
+ should_determine_encoding_implicitly = false;
+ dom_setup_parser_encoding_manually((const lxb_char_t *) buf, encoding_data, &decoding_encoding_ctx, &application_data);
+ }
+
// TODO: context from LIBXML(stream_context) ???
// TODO: https://mimesniff.spec.whatwg.org/#parsing-a-mime-type
stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, /* opened_path */ NULL, /* context */ NULL);
@@ -703,31 +735,17 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
goto fail_oom;
}
- /* Setup everything encoding & decoding related */
- bool first_read = true;
- dom_decoding_encoding_ctx decoding_encoding_ctx;
- dom_decoding_encoding_ctx_init(&decoding_encoding_ctx);
-
size_t tokenizer_error_offset = 0;
size_t tree_error_offset = 0;
ssize_t read;
- char buf[4096];
lxb_html_parser_t *parser = document->dom_document.parser;
while ((read = php_stream_read(stream, buf, sizeof(buf))) > 0) {
const lxb_char_t *buf_ref = (const lxb_char_t *) buf;
- /* First read => determine encoding */
- if (first_read) {
- first_read = false;
- dom_setup_parser_encoding(&buf_ref, (size_t *) &read, &decoding_encoding_ctx);
- if (decoding_encoding_ctx.fast_path) {
- application_data.current_input_codepoints = NULL;
- application_data.current_input_characters = buf;
- } else {
- application_data.current_input_codepoints = decoding_encoding_ctx.codepoints;
- application_data.current_input_characters = NULL;
- }
+ if (should_determine_encoding_implicitly) {
+ should_determine_encoding_implicitly = false;
+ dom_setup_parser_encoding_implicitly(&buf_ref, (size_t *) &read, &decoding_encoding_ctx, &application_data);
}
const lxb_char_t *buf_end = buf_ref + read;
@@ -771,7 +789,7 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
if (UNEXPECTED(!converted)) {
goto fail_oom;
}
- /* Check for "file:/"" instead of "file://" because of libxml2 quirk */
+ /* Check for "file:/" instead of "file://" because of libxml2 quirk */
if (strncmp((const char *) converted, "file:/", sizeof("file:/") - 1) != 0) {
xmlChar *buffer = xmlStrdup((const xmlChar *) "file://");
if (UNEXPECTED(!buffer)) {
diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h
index fe9dafaf70018..29b6927dc0cfb 100644
--- a/ext/dom/php_dom.h
+++ b/ext/dom/php_dom.h
@@ -163,7 +163,7 @@ dom_object *php_dom_instantiate_object_helper(zval *return_value, zend_class_ent
#define DOM_LOAD_STRING 0
#define DOM_LOAD_FILE 1
-xmlDocPtr dom_document_parser(zval *id, int mode, const char *source, size_t source_len, size_t options);
+xmlDocPtr dom_document_parser(zval *id, int mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding);
/* parentnode */
void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc);
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index 95506befbab3d..927a392e374d1 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -1139,9 +1139,9 @@ private function __construct() {}
public static function createEmpty(string $encoding = "UTF-8"): HTMLDocument {}
- public static function createFromFile(string $path, int $options = 0): HTMLDocument {}
+ public static function createFromFile(string $path, int $options = 0, ?string $override_encoding = null): HTMLDocument {}
- public static function createFromString(string $source, int $options = 0): HTMLDocument {}
+ public static function createFromString(string $source, int $options = 0, ?string $override_encoding = null): HTMLDocument {}
/** @implementation-alias DOMDocument::saveXML */
public function saveXML(?\DOMNode $node = null, int $options = 0): string|false {}
@@ -1161,9 +1161,9 @@ private function __construct() {}
public static function createEmpty(string $version = "1.0", string $encoding = "UTF-8"): XMLDocument {}
- public static function createFromFile(string $path, int $options = 0): XMLDocument {}
+ public static function createFromFile(string $path, int $options = 0, ?string $override_encoding = null): XMLDocument {}
- public static function createFromString(string $source, int $options = 0): XMLDocument {}
+ public static function createFromString(string $source, int $options = 0, ?string $override_encoding = null): XMLDocument {}
/** @readonly */
public ?string $xmlEncoding;
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index 95cb28e294f72..177f86510dcbd 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 8493d85f8aa611f9739be2d59568d98ea4506522 */
+ * Stub hash: bc74a857ba008dd3ba6b86256c154050b7c07552 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -554,11 +554,13 @@ ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_HTMLDocument_createFromFile, 0, 1, DOM\\HTMLDocument, 0)
ZEND_ARG_TYPE_INFO(0, path, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, override_encoding, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_HTMLDocument_createFromString, 0, 1, DOM\\HTMLDocument, 0)
ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, override_encoding, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveXML, 0, 0, MAY_BE_STRING|MAY_BE_FALSE)
@@ -589,11 +591,13 @@ ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_XMLDocument_createFromFile, 0, 1, DOM\\XMLDocument, 0)
ZEND_ARG_TYPE_INFO(0, path, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, override_encoding, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_XMLDocument_createFromString, 0, 1, DOM\\XMLDocument, 0)
ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, override_encoding, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()
#define arginfo_class_DOM_XMLDocument_createEntityReference arginfo_class_DOMDocument_createEntityReference
diff --git a/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_override_encoding.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_override_encoding.phpt
new file mode 100644
index 0000000000000..041447cf4bb7c
--- /dev/null
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_override_encoding.phpt
@@ -0,0 +1,31 @@
+--TEST--
+DOM\HTMLDocument::createFromFile() with override_encoding
+--EXTENSIONS--
+dom
+--FILE--
+getMessage(), "\n";
+}
+
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . '/gb18030_without_charset.html', override_encoding: 'GB18030');
+var_dump($dom->documentElement->lastChild->textContent);
+var_dump($dom->encoding);
+
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . '/fallback_encoding.html', override_encoding: 'Windows-1252');
+var_dump($dom->documentElement->lastChild->textContent);
+var_dump($dom->encoding);
+
+?>
+--EXPECT--
+DOM\HTMLDocument::createFromFile(): Argument #3 ($override_encoding) must be a valid document encoding
+string(20) "
+ Héllo, world!
+"
+string(7) "gb18030"
+string(1) "
+"
+string(12) "windows-1252"
diff --git a/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromString_override_encoding.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromString_override_encoding.phpt
new file mode 100644
index 0000000000000..c6382a3fae900
--- /dev/null
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromString_override_encoding.phpt
@@ -0,0 +1,31 @@
+--TEST--
+DOM\HTMLDocument::createFromString() with override_encoding
+--EXTENSIONS--
+dom
+--FILE--
+getMessage(), "\n";
+}
+
+$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . '/gb18030_without_charset.html'), override_encoding: 'GB18030');
+var_dump($dom->documentElement->lastChild->textContent);
+var_dump($dom->encoding);
+
+$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . '/fallback_encoding.html'), override_encoding: 'Windows-1252');
+var_dump($dom->documentElement->lastChild->textContent);
+var_dump($dom->encoding);
+
+?>
+--EXPECT--
+DOM\HTMLDocument::createFromString(): Argument #3 ($override_encoding) must be a valid document encoding
+string(20) "
+ Héllo, world!
+"
+string(7) "gb18030"
+string(1) "
+"
+string(12) "windows-1252"
diff --git a/ext/dom/tests/modern/html/encoding/gb18030_without_charset.html b/ext/dom/tests/modern/html/encoding/gb18030_without_charset.html
new file mode 100644
index 0000000000000..4140d3381f3f4
--- /dev/null
+++ b/ext/dom/tests/modern/html/encoding/gb18030_without_charset.html
@@ -0,0 +1,7 @@
+
+
+No charset!
+
+
+ Hllo, world!
+
\ No newline at end of file
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_createFromFile_createFromString_BOM_buffer_edge.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_createFromFile_createFromString_BOM_buffer_edge.phpt
new file mode 100644
index 0000000000000..8d7d70e6d9f8c
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_createFromFile_createFromString_BOM_buffer_edge.phpt
@@ -0,0 +1,26 @@
+--TEST--
+DOM\HTMLDocument::createFromFile()/createFromString() BOM with a buffer on the edge
+--EXTENSIONS--
+dom
+--FILE--
+";
+$trailer = "";
+$data = $header . str_repeat("a", 4096 - strlen($header) - strlen($trailer)) . $trailer;
+
+$dom = DOM\HTMLDocument::createFromString($header . str_repeat("a", 4096 - strlen($header) - strlen($trailer)) . $trailer);
+var_dump($dom->textContent);
+
+file_put_contents(__DIR__ . "/BOM_edge.tmp", $data);
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . "/BOM_edge.tmp");
+var_dump($dom->textContent);
+
+?>
+--CLEAN--
+
+--EXPECT--
+string(4052) "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+string(4052) "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
diff --git a/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt b/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt
new file mode 100644
index 0000000000000..088471d1c1106
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt
@@ -0,0 +1,28 @@
+--TEST--
+DOM\XMLDocument::createFromFile() with override_encoding
+--EXTENSIONS--
+dom
+--FILE--
+getMessage(), "\n";
+}
+
+$dom = DOM\XMLDocument::createFromFile(__DIR__ . '/dummy.xml', override_encoding: 'UTF-8');
+var_dump($dom->documentElement->lastChild->textContent);
+var_dump($dom->encoding);
+
+$dom = DOM\XMLDocument::createFromFile(__DIR__ . '/dummy.xml', override_encoding: 'Windows-1252');
+var_dump($dom->documentElement->lastChild->textContent);
+var_dump($dom->encoding);
+
+?>
+--EXPECT--
+DOM\XMLDocument::createFromFile(): Argument #3 ($override_encoding) must be a valid document encoding
+string(2) "é"
+NULL
+string(4) "é"
+NULL
diff --git a/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt b/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt
new file mode 100644
index 0000000000000..4247e3267b7f2
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt
@@ -0,0 +1,28 @@
+--TEST--
+DOM\XMLDocument::createFromString() with override_encoding
+--EXTENSIONS--
+dom
+--FILE--
+getMessage(), "\n";
+}
+
+$dom = DOM\XMLDocument::createFromString(file_get_contents(__DIR__ . '/dummy.xml'), override_encoding: 'UTF-8');
+var_dump($dom->documentElement->lastChild->textContent);
+var_dump($dom->encoding);
+
+$dom = DOM\XMLDocument::createFromString(file_get_contents(__DIR__ . '/dummy.xml'), override_encoding: 'Windows-1252');
+var_dump($dom->documentElement->lastChild->textContent);
+var_dump($dom->encoding);
+
+?>
+--EXPECT--
+DOM\XMLDocument::createFromString(): Argument #3 ($override_encoding) must be a valid document encoding
+string(2) "é"
+NULL
+string(4) "é"
+NULL
diff --git a/ext/dom/tests/modern/xml/dummy.xml b/ext/dom/tests/modern/xml/dummy.xml
new file mode 100644
index 0000000000000..305c2f435d62c
--- /dev/null
+++ b/ext/dom/tests/modern/xml/dummy.xml
@@ -0,0 +1,2 @@
+
+é
diff --git a/ext/dom/xml_document.c b/ext/dom/xml_document.c
index 1e2c0595c30b6..9706de5a847fd 100644
--- a/ext/dom/xml_document.c
+++ b/ext/dom/xml_document.c
@@ -138,10 +138,10 @@ PHP_METHOD(DOM_XMLDocument, createEmpty)
static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
{
- const char *source;
- size_t source_len;
+ const char *source, *override_encoding = NULL;
+ size_t source_len, override_encoding_len;
zend_long options = 0;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &source, &source_len, &options) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|lp!", &source, &source_len, &options, &override_encoding, &override_encoding_len) == FAILURE) {
RETURN_THROWS();
}
@@ -165,7 +165,17 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
RETURN_THROWS();
}
- xmlDocPtr lxml_doc = dom_document_parser(NULL, mode, source, source_len, options);
+ xmlCharEncodingHandlerPtr encoding = NULL;
+ if (override_encoding != NULL) {
+ encoding = xmlFindCharEncodingHandler(override_encoding);
+ if (!encoding) {
+ zend_argument_value_error(3, "must be a valid document encoding");
+ RETURN_THROWS();
+ }
+ options |= XML_PARSE_IGNORE_ENC;
+ }
+
+ xmlDocPtr lxml_doc = dom_document_parser(NULL, mode, source, source_len, options, encoding);
if (UNEXPECTED(lxml_doc == NULL)) {
if (!EG(exception)) {
if (mode == DOM_LOAD_FILE) {
From 0cd2449168f9f6566d104109bad8f72fafd93816 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 4 Oct 2023 22:20:11 +0200
Subject: [PATCH 20/53] Make libxml stream context externally visible and use
it in html_document
---
ext/dom/html_document.c | 3 +--
ext/libxml/libxml.c | 7 ++++++-
ext/libxml/php_libxml.h | 1 +
3 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 7e99d743b3ea0..a4a328bdd72e3 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -715,9 +715,8 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
dom_setup_parser_encoding_manually((const lxb_char_t *) buf, encoding_data, &decoding_encoding_ctx, &application_data);
}
- // TODO: context from LIBXML(stream_context) ???
// TODO: https://mimesniff.spec.whatwg.org/#parsing-a-mime-type
- stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, /* opened_path */ NULL, /* context */ NULL);
+ stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, /* opened_path */ NULL, /* context */ php_libxml_get_stream_context());
if (!stream) {
if (!EG(exception)) {
zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", filename);
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index 03fcbd6acf8af..a32f0e2a10297 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -369,6 +369,11 @@ static PHP_GINIT_FUNCTION(libxml)
libxml_globals->entity_loader_callback = empty_fcall_info_cache;
}
+PHP_LIBXML_API php_stream_context *php_libxml_get_stream_context(void)
+{
+ return php_stream_context_from_zval(Z_ISUNDEF(LIBXML(stream_context)) ? NULL : &LIBXML(stream_context), false);
+}
+
/* Channel libxml file io layer through the PHP streams subsystem.
* This allows use of ftps:// and https:// urls */
@@ -436,7 +441,7 @@ static void *php_libxml_streams_IO_open_wrapper(const char *filename, const char
}
}
- context = php_stream_context_from_zval(Z_ISUNDEF(LIBXML(stream_context))? NULL : &LIBXML(stream_context), 0);
+ context = php_libxml_get_stream_context();
ret_val = php_stream_open_wrapper_ex(path_to_open, (char *)mode, REPORT_ERRORS, NULL, context);
if (ret_val) {
diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h
index fd8c76be0eb92..059b61d251641 100644
--- a/ext/libxml/php_libxml.h
+++ b/ext/libxml/php_libxml.h
@@ -139,6 +139,7 @@ PHP_LIBXML_API void php_libxml_switch_context(zval *context, zval *oldcontext);
PHP_LIBXML_API void php_libxml_issue_error(int level, const char *msg);
PHP_LIBXML_API bool php_libxml_disable_entity_loader(bool disable);
PHP_LIBXML_API void php_libxml_set_old_ns(xmlDocPtr doc, xmlNsPtr ns);
+PHP_LIBXML_API php_stream_context *php_libxml_get_stream_context(void);
/* Init/shutdown functions*/
PHP_LIBXML_API void php_libxml_initialize(void);
From 9edb7ebdc6524e002c622513ee449e22477192e5 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 7 Oct 2023 16:47:14 +0200
Subject: [PATCH 21/53] Nope: BC
---
ext/dom/html_document.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index a4a328bdd72e3..2982bd4d6cfb4 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -783,7 +783,6 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
}
if (stream->wrapper == &php_plain_files_wrapper) {
- // TODO: do the same for XMLDocument?
xmlChar *converted = xmlPathToURI((const xmlChar *) filename);
if (UNEXPECTED(!converted)) {
goto fail_oom;
From 1633f02f753de72f32d49c51d9d926b186d03dfe Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 7 Oct 2023 20:31:48 +0200
Subject: [PATCH 22/53] Implement MIME sniff
---
ext/dom/html_document.c | 14 +-
...MLDocument_createFromFile_http_header.phpt | 96 ++++++
ext/libxml/config.w32 | 2 +-
ext/libxml/config0.m4 | 2 +-
ext/libxml/libxml.c | 48 +--
ext/libxml/mime_sniff.c | 323 ++++++++++++++++++
ext/libxml/php_libxml.h | 3 +
7 files changed, 444 insertions(+), 44 deletions(-)
create mode 100644 ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_http_header.phpt
create mode 100644 ext/libxml/mime_sniff.c
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 2982bd4d6cfb4..ddba2a0c0451b 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -715,7 +715,6 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
dom_setup_parser_encoding_manually((const lxb_char_t *) buf, encoding_data, &decoding_encoding_ctx, &application_data);
}
- // TODO: https://mimesniff.spec.whatwg.org/#parsing-a-mime-type
stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, /* opened_path */ NULL, /* context */ php_libxml_get_stream_context());
if (!stream) {
if (!EG(exception)) {
@@ -724,6 +723,19 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
RETURN_THROWS();
}
+ /* MIME sniff */
+ if (should_determine_encoding_implicitly) {
+ zend_string *charset = php_libxml_sniff_charset_from_stream(stream);
+ if (charset != NULL) {
+ const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name((const lxb_char_t *) ZSTR_VAL(charset), ZSTR_LEN(charset));
+ if (encoding_data != NULL) {
+ should_determine_encoding_implicitly = false;
+ dom_setup_parser_encoding_manually((const lxb_char_t *) buf, encoding_data, &decoding_encoding_ctx, &application_data);
+ }
+ zend_string_release_ex(charset, false);
+ }
+ }
+
lxb_html_document_t *document = lxb_html_document_create();
if (UNEXPECTED(document == NULL)) {
goto fail_oom;
diff --git a/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_http_header.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_http_header.phpt
new file mode 100644
index 0000000000000..3209c3e5d6ae0
--- /dev/null
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_http_header.phpt
@@ -0,0 +1,96 @@
+--TEST--
+DOM\HTMLDocument::createFromFile() HTTP header Content-Type
+--EXTENSIONS--
+dom
+--SKIPIF--
+
+--FILE--
+ [
+ "/html; Charset=\"ISO-8859-1\"",
+ "text/; Charset=\"ISO-8859-1\"",
+ "tex°t/html; Charset=\"ISO-8859-1\"",
+ "/; Charset=\"ISO-8859-1\"",
+ "$/€; Charset=\"ISO-8859-1\"",
+ "; Charset=\"ISO-8859-1\"",
+ ";",
+ "",
+ " \t",
+ ],
+ "Valid type/subtype without charset" => [
+ "text/html; x=ISO-8859-1",
+ "text/html; x=\"ISO-8859-1\"",
+ "text/html; charet=\"ISO-8859-1\"",
+ "text/html; chars et=\"ISO-8859-1\"",
+ ],
+ "All valid inputs" => [
+ "text/html; charset=ISO-8859-1",
+ "\t\r text/html; charset=ISO-8859-1 \t",
+ "text/html; foo=bar;charset=ISO-8859-1",
+ "text/html; foo=bar;charset=ISO-8859-1;bar=\"foooooo\"",
+ "text/html;;;; charset=ISO-8859-1",
+ "text/html; Charset=\"ISO-8859-1\"",
+ "text/html; Charset=\"ISO\\-8859-1\"",
+ "text/html; ;; ; ;; Charset=\"ISO-8859-1\"",
+ "text/html;Charset=\"ISO-8859-1",
+ "tex.t/h#\$%!&'*%2B-.^_`|~tml;Charset=\"ISO-8859-1\"", // Note: have to encode + as 2B because of implementation details of http_server()
+ ],
+ "Valid input, but invalid encoding name" => [
+ "text/html;Charset=\"ISO-8859-1\\",
+ "text/html;Charset=\"ISO-8859-1\\\"",
+ "text/html;Charset=\"foobar\\\"",
+ "text/html;Charset=\"\\\"",
+ "text/html;Charset=",
+ ],
+];
+
+foreach ($tests as $name => $headers) {
+ echo "--- $name ---\n";
+ $responses = array_map(fn ($header) => "data://text/plain,HTTP/1.1 200 OK\r\nContent-Type: " . $header . "\r\n\r\n" . "\xE4\xF6\xFC
\n", $headers);
+ ['pid' => $pid, 'uri' => $uri] = http_server($responses);
+ for ($i = 0; $i < count($responses); $i++) {
+ $result = DOM\HTMLDocument::createFromFile($uri, LIBXML_NOERROR);
+ echo $result->textContent;
+ }
+ http_server_kill($pid);
+}
+?>
+--EXPECT--
+--- Invalid type/subtype ---
+���
+���
+���
+���
+���
+���
+���
+���
+���
+--- Valid type/subtype without charset ---
+���
+���
+���
+���
+--- All valid inputs ---
+äöü
+äöü
+äöü
+äöü
+äöü
+äöü
+äöü
+äöü
+äöü
+äöü
+--- Valid input, but invalid encoding name ---
+���
+���
+���
+���
+���
diff --git a/ext/libxml/config.w32 b/ext/libxml/config.w32
index b11c57bc44a72..3a2a707f3e4b0 100644
--- a/ext/libxml/config.w32
+++ b/ext/libxml/config.w32
@@ -9,7 +9,7 @@ if (PHP_LIBXML == "yes") {
CHECK_HEADER_ADD_INCLUDE("libxml/tree.h", "CFLAGS_LIBXML", PHP_PHP_BUILD + "\\include\\libxml2") &&
ADD_EXTENSION_DEP('libxml', 'iconv')) {
- EXTENSION("libxml", "libxml.c", false /* never shared */, "/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
+ EXTENSION("libxml", "libxml.c mime_sniff.c", false /* never shared */, "/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
AC_DEFINE("HAVE_LIBXML", 1, "LibXML support");
ADD_FLAG("CFLAGS_LIBXML", "/D LIBXML_STATIC /D LIBXML_STATIC_FOR_DLL /D HAVE_WIN32_THREADS ");
if (!PHP_LIBXML_SHARED) {
diff --git a/ext/libxml/config0.m4 b/ext/libxml/config0.m4
index 044a58fa6246f..a594e350e1494 100644
--- a/ext/libxml/config0.m4
+++ b/ext/libxml/config0.m4
@@ -11,7 +11,7 @@ if test "$PHP_LIBXML" != "no"; then
PHP_SETUP_LIBXML(LIBXML_SHARED_LIBADD, [
AC_DEFINE(HAVE_LIBXML,1,[ ])
- PHP_NEW_EXTENSION(libxml, [libxml.c], $ext_shared,, -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1)
+ PHP_NEW_EXTENSION(libxml, [libxml.c mime_sniff.c], $ext_shared,, -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1)
PHP_INSTALL_HEADERS([ext/libxml/php_libxml.h])
])
fi
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index a32f0e2a10297..72e020707abcc 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -501,47 +501,13 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc)
/* Check if there's been an external transport protocol with an encoding information */
if (enc == XML_CHAR_ENCODING_NONE) {
php_stream *s = (php_stream *) context;
-
- if (Z_TYPE(s->wrapperdata) == IS_ARRAY) {
- zval *header;
-
- ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) {
- const char buf[] = "Content-Type:";
- if (Z_TYPE_P(header) == IS_STRING &&
- !zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) {
- char needle[] = "charset=";
- char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header));
- char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), strlen(needle));
-
- if (encoding) {
- char *end;
-
- encoding += sizeof("charset=")-1;
- if (*encoding == '"') {
- encoding++;
- }
- end = strchr(encoding, ';');
- if (end == NULL) {
- end = encoding + strlen(encoding);
- }
- end--; /* end == encoding-1 isn't a buffer underrun */
- while (*end == ' ' || *end == '\t') {
- end--;
- }
- if (*end == '"') {
- end--;
- }
- if (encoding >= end) continue;
- *(end+1) = '\0';
- enc = xmlParseCharEncoding(encoding);
- if (enc <= XML_CHAR_ENCODING_NONE) {
- enc = XML_CHAR_ENCODING_NONE;
- }
- }
- efree(haystack);
- break; /* found content-type */
- }
- } ZEND_HASH_FOREACH_END();
+ zend_string *charset = php_libxml_sniff_charset_from_stream(s);
+ if (charset != NULL) {
+ enc = xmlParseCharEncoding(ZSTR_VAL(charset));
+ if (enc <= XML_CHAR_ENCODING_NONE) {
+ enc = XML_CHAR_ENCODING_NONE;
+ }
+ zend_string_release_ex(charset, false);
}
}
diff --git a/ext/libxml/mime_sniff.c b/ext/libxml/mime_sniff.c
new file mode 100644
index 0000000000000..634c7fba621f7
--- /dev/null
+++ b/ext/libxml/mime_sniff.c
@@ -0,0 +1,323 @@
+/*
+ +----------------------------------------------------------------------+
+ | Copyright (c) The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | https://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Niels Dossche |
+ +----------------------------------------------------------------------+
+*/
+
+/* This file implements the MIME sniff algorithm from https://mimesniff.spec.whatwg.org/#parsing-a-mime-type (Date: 2023-09-27)
+ * It is a strict implementation of the algorithm, i.e. it does not accept malformed headers.
+ * In particular, it exposes php_dom_sniff_charset() to parse the charset from the Content-Type header.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "php.h"
+#ifdef HAVE_LIBXML
+
+#include "php_libxml.h"
+
+static bool is_not_slash(char c)
+{
+ return c != '/';
+}
+
+static bool is_not_semicolon(char c)
+{
+ return c != ';';
+}
+
+static bool is_not_semicolon_or_equals(char c)
+{
+ return c != ';' && c != '=';
+}
+
+static bool is_not_quote_or_backslash(char c)
+{
+ return c != '"' && c != '\\';
+}
+
+/* https://fetch.spec.whatwg.org/#http-tab-or-space */
+static bool is_http_tab_or_space(char c)
+{
+ return c == 0x09 || c == 0x20;
+}
+
+/* https://fetch.spec.whatwg.org/#http-whitespace */
+static bool is_http_whitespace(char c)
+{
+ return c == 0x0A || c == 0x0D || is_http_tab_or_space(c);
+}
+
+/* https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point */
+static bool is_http_quoted_string_token(unsigned char c) /* Note: unsigned is important to let the >= 0x20 check work properly! */
+{
+ return c == 0x09 || (c >= 0x20 && c != 0x7F);
+}
+
+/* https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
+ * Implemented by returning the length of the sequence */
+static zend_always_inline size_t collect_a_sequence_of_code_points(const char *position, const char *end, bool (*condition)(char))
+{
+ const char *start = position;
+ while (position < end && condition(*position)) {
+ position++;
+ }
+ return position - start;
+}
+
+/* https://fetch.spec.whatwg.org/#collect-an-http-quoted-string with extract-value always true */
+static zend_string *collect_an_http_quoted_string_with_extract_value(const char *position, const char *end, const char **position_out)
+{
+ /* 1. Saving positionStart is not necessary, as in the extract-value == true variant we don't use it */
+
+ /* 2. Let value be the empty string */
+ zend_string *value = zend_string_alloc(end - position /* can't be longer than this */, false);
+ ZSTR_LEN(value) = 0;
+
+ /* 3. Assert */
+ ZEND_ASSERT(*position == '"');
+
+ /* 4. Advance */
+ position++;
+
+ /* 5. While true */
+ while (true) {
+ /* 5.1. Append the result of collect a sequence of code points that are not '"' or '\\' */
+ size_t length = collect_a_sequence_of_code_points(position, end, is_not_quote_or_backslash);
+ memcpy(ZSTR_VAL(value) + ZSTR_LEN(value), position, length);
+ ZSTR_LEN(value) += length;
+ position += length;
+
+ /* 5.2. Past end check */
+ if (position >= end) {
+ break;
+ }
+
+ /* 5.3. quoteOrBackslash is the code point at position */
+ char quote_or_backslash = *position;
+
+ /* 5.4. Advance */
+ position++;
+
+ /* 5.5. quote_or_backslash is '\\', deal with escaping */
+ if (quote_or_backslash == '\\') {
+ /* 5.5.1. Past end check */
+ if (position >= end) {
+ ZSTR_VAL(value)[ZSTR_LEN(value)] = '\\';
+ ZSTR_LEN(value)++;
+ break;
+ }
+
+ /* 5.5.2. Append code point at position */
+ ZSTR_VAL(value)[ZSTR_LEN(value)] = *position;
+ ZSTR_LEN(value)++;
+
+ /* 5.5.3. Advance */
+ position++;
+ } else {
+ /* 5.6. Otherwise: assert and break */
+ ZEND_ASSERT(quote_or_backslash == '"');
+ break;
+ }
+ }
+
+ ZSTR_VAL(value)[ZSTR_LEN(value)] = '\0';
+
+ *position_out = position;
+
+ /* 6. extract-value is always true, return value */
+ /* Step 7 is not needed because we always return here already */
+ return value;
+}
+
+/* https://infra.spec.whatwg.org/#ascii-alphanumeric */
+static bool is_ascii_alpha_numeric(char c)
+{
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+/* https://mimesniff.spec.whatwg.org/#http-token-code-point */
+static bool is_http_token(char c)
+{
+ return c == 0x21
+ || (c >= 0x23 && c <= 0x27)
+ || c == 0x2A || c == 0x2B || c == 0x2D || c == 0x2E
+ || c == 0x5E || c == 0x5F
+ || c == 0x60
+ || c == 0x7C || c == 0x7E
+ || is_ascii_alpha_numeric(c);
+}
+
+static bool is_empty_string_or_does_not_solely_contain_http_token_code_points(const char *start, size_t len)
+{
+ if (len == 0) {
+ return true;
+ }
+ while (len > 0) {
+ if (!is_http_token(*start)) {
+ return true;
+ }
+ len--;
+ start++;
+ }
+ return false;
+}
+
+static bool solely_contains_http_quoted_string_tokens(const char *start, size_t len)
+{
+ while (len > 0) {
+ if (!is_http_quoted_string_token(*start)) {
+ return false;
+ }
+ len--;
+ start++;
+ }
+ return true;
+}
+
+/* https://mimesniff.spec.whatwg.org/#parsing-a-mime-type
+ * Note: We only care about the charset detection */
+PHP_LIBXML_API zend_string *php_libxml_sniff_charset_from_string(const char *start, const char *end)
+{
+ /* 1. Remove leading & trailing HTTP whitespace */
+ while (start < end && is_http_whitespace(*start)) {
+ start++;
+ }
+ while (start < end && is_http_whitespace(*end)) {
+ end--;
+ }
+
+ /* 2. Position variable: no-op because we move the start pointer instead */
+
+ /* 3. Collect sequence of code points that are not '/' (for type) */
+ size_t type_length = collect_a_sequence_of_code_points(start, end, is_not_slash);
+
+ /* 4. Empty string or not solely http tokens */
+ if (is_empty_string_or_does_not_solely_contain_http_token_code_points(start, type_length)) {
+ return NULL;
+ }
+ start += type_length;
+
+ /* 5. Failure if past end of input (note: end is one past the last char; in practice this is only possible if no '/' was found) */
+ if (start >= end) {
+ return NULL;
+ }
+
+ /* 6. Skip '/' */
+ start++;
+
+ /* 7. Collect sequence of code points that are not ';' (for subtype) */
+ size_t subtype_length = collect_a_sequence_of_code_points(start, end, is_not_semicolon);
+
+ /* 8. Remove trailing HTTP whitespace from subtype, but we don't care about subtype, so no-op */
+
+ /* 9. Empty string or not solely http tokens */
+ if (is_empty_string_or_does_not_solely_contain_http_token_code_points(start, subtype_length)) {
+ return NULL;
+ }
+ start += subtype_length;
+
+ /* 10. Initialise stuff, no-op as well as we don't care about anything other than charset */
+
+ /* 11. Loop with check: position not past end */
+ while (start < end) {
+ /* 11.1. Advance position */
+ start++;
+
+ /* 11.2. Collect sequence that *is* HTTP whitespace */
+ size_t whitespace_length = collect_a_sequence_of_code_points(start, end, is_http_whitespace);
+ start += whitespace_length;
+
+ /* 11.3. Collect a sequence of code points that are not ';' or '=' (for parameterName) */
+ size_t parameter_name_length = collect_a_sequence_of_code_points(start, end, is_not_semicolon_or_equals);
+ const char *parameter_name = start;
+ start += parameter_name_length;
+
+ /* 11.4. Convert parameter_name to ASCII lowercase, no-op because we are only interested in charset which we'll match down below */
+
+ /* 11.5. Position past input check */
+ if (start < end) {
+ if (*start == ';') {
+ continue;
+ }
+ start++;
+ } else {
+ /* 11.6. */
+ break;
+ }
+
+ /* 11.7. Let parameterValue be null */
+ zend_string *parameter_value = NULL;
+
+ /* 11.8. Quoted string check */
+ if (*start == '"') {
+ /* 11.8.1. Set parameterValue to the result of collecting an HTTP quoted string */
+ parameter_value = collect_an_http_quoted_string_with_extract_value(start, end, &start);
+
+ /* 11.8.2. Collect a sequence of code points that are not ';' */
+ start += collect_a_sequence_of_code_points(start, end, is_not_semicolon);
+ } else {
+ /* 9. Otherwise */
+ /* 9.1. Set parameterValue to the result of collecting a sequence of code points that are not ';' */
+ size_t parameter_value_length = collect_a_sequence_of_code_points(start, end, is_not_semicolon);
+ parameter_value = zend_string_init(start, parameter_value_length, false);
+ start += parameter_name_length;
+
+ /* 9.2. Remove trailing HTTP whitespace from parameterValue */
+ while (ZSTR_LEN(parameter_value) > 0 && is_http_whitespace(ZSTR_VAL(parameter_value)[ZSTR_LEN(parameter_value) - 1])) {
+ ZSTR_LEN(parameter_value)--;
+ }
+ ZSTR_VAL(parameter_value)[ZSTR_LEN(parameter_value)] = '\0';
+
+ /* 9.3. Continue if parameterValue is empty */
+ if (ZSTR_LEN(parameter_value) == 0) {
+ zend_string_release_ex(parameter_value, false);
+ continue;
+ }
+ }
+
+ /* 10. We diverge from the spec here: we're only interested in charset.
+ * Furthermore, as only the first match matters, we can stop immediately with the loop once we set the charset. */
+ if (parameter_name_length == strlen("charset")
+ && strncasecmp(parameter_name, "charset", strlen("charset")) == 0 /* Because of lowercasing in step 11.4 */
+ && solely_contains_http_quoted_string_tokens(ZSTR_VAL(parameter_value), ZSTR_LEN(parameter_value))) {
+ return parameter_value;
+ }
+
+ zend_string_release_ex(parameter_value, false);
+ }
+
+ /* 12. Return mimetype, a no-op / spec divergence */
+ return NULL;
+}
+
+PHP_LIBXML_API zend_string *php_libxml_sniff_charset_from_stream(const php_stream *s)
+{
+ if (Z_TYPE(s->wrapperdata) == IS_ARRAY) {
+ zval *header;
+
+ ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) {
+ const char buf[] = "Content-Type:";
+ if (Z_TYPE_P(header) == IS_STRING &&
+ !zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) {
+ return php_libxml_sniff_charset_from_string(Z_STRVAL_P(header) + sizeof(buf) - 1, Z_STRVAL_P(header) + Z_STRLEN_P(header));
+ }
+ } ZEND_HASH_FOREACH_END();
+ }
+
+ return NULL;
+}
+
+#endif /* HAVE_LIBXML */
diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h
index 059b61d251641..3e0b6f7dcf563 100644
--- a/ext/libxml/php_libxml.h
+++ b/ext/libxml/php_libxml.h
@@ -141,6 +141,9 @@ PHP_LIBXML_API bool php_libxml_disable_entity_loader(bool disable);
PHP_LIBXML_API void php_libxml_set_old_ns(xmlDocPtr doc, xmlNsPtr ns);
PHP_LIBXML_API php_stream_context *php_libxml_get_stream_context(void);
+PHP_LIBXML_API zend_string *php_libxml_sniff_charset_from_string(const char *start, const char *end);
+PHP_LIBXML_API zend_string *php_libxml_sniff_charset_from_stream(const php_stream *s);
+
/* Init/shutdown functions*/
PHP_LIBXML_API void php_libxml_initialize(void);
PHP_LIBXML_API void php_libxml_shutdown(void);
From cd00da0ca6b5e3104d461cf6122622eb3920fe99 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Thu, 12 Oct 2023 19:32:13 +0200
Subject: [PATCH 23/53] Fix crash if document is uninitialized
---
ext/dom/document.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/ext/dom/document.c b/ext/dom/document.c
index 517fdd09810e4..ba696eb2f5758 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -1323,12 +1323,13 @@ static void php_dom_finish_loading_document(zval *this, zval *return_value, xmlD
dom_object *intern = Z_DOMOBJ_P(this);
size_t old_modification_nr = 0;
if (intern != NULL) {
- bool is_modern_api_class = intern->document->is_modern_api_class;
+ bool is_modern_api_class = false;
xmlDocPtr docp = (xmlDocPtr) dom_object_get_node(intern);
dom_doc_propsptr doc_prop = NULL;
if (docp != NULL) {
const php_libxml_ref_obj *doc_ptr = intern->document;
ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */
+ is_modern_api_class = doc_ptr->is_modern_api_class;
old_modification_nr = doc_ptr->cache_tag.modification_nr;
php_libxml_decrement_node_ptr((php_libxml_node_object *) intern);
doc_prop = intern->document->doc_props;
From 1a9d74c2a3575402b1783f6a24db8a166f47400b Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Thu, 19 Oct 2023 00:37:51 +0200
Subject: [PATCH 24/53] Fix test output due to class changes in this RFC
---
ext/dom/tests/registerNodeClass_abstract_class.phpt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ext/dom/tests/registerNodeClass_abstract_class.phpt b/ext/dom/tests/registerNodeClass_abstract_class.phpt
index 24124d712ea09..3c384f3f5d522 100644
--- a/ext/dom/tests/registerNodeClass_abstract_class.phpt
+++ b/ext/dom/tests/registerNodeClass_abstract_class.phpt
@@ -21,4 +21,4 @@ $dom->createElement("foo");
?>
--EXPECT--
-ValueError: DOMDocument::registerNodeClass(): Argument #2 ($extendedClass) must not be an abstract class
+ValueError: DOM\Document::registerNodeClass(): Argument #2 ($extendedClass) must not be an abstract class
From cd037b058fd1832a70e0dec43a92f0d229ab2e42 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Thu, 19 Oct 2023 16:40:44 +0200
Subject: [PATCH 25/53] rename tests
---
.../modern/html/serializer/HTMLDocument_serialize_text_02.phpt | 2 +-
.../modern/html/serializer/HTMLDocument_serialize_text_03.phpt | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_02.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_02.phpt
index 4b9ead2e723cd..31e12bbd6ef47 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_02.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_02.phpt
@@ -1,5 +1,5 @@
--TEST--
-DOM\HTMLDocument serialization escape text 02
+DOM\HTMLDocument serialization escape text 02 - special tags in html namespace
--EXTENSIONS--
dom
--FILE--
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_03.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_03.phpt
index d9c8b39095544..fee3c34b3c5dd 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_03.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_text_03.phpt
@@ -1,5 +1,5 @@
--TEST--
-DOM\HTMLDocument serialization escape text 03
+DOM\HTMLDocument serialization escape text 03 - special tags in namespace should encode content
--EXTENSIONS--
dom
--FILE--
From 1da1a6e93e3ac3ee9c3fea6e2d760f3ff70b4cd8 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Thu, 19 Oct 2023 19:51:47 +0200
Subject: [PATCH 26/53] Cleanup: encoding is always set for the new
HTMLDocument class
---
ext/dom/html_document.c | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index ddba2a0c0451b..968685ee57744 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -885,14 +885,7 @@ static zend_result dom_common_save(dom_output_ctx *output_ctx, const xmlDoc *doc
{
/* Initialize everything related to encoding & decoding */
const lxb_encoding_data_t *decoding_data = lxb_encoding_data(LXB_ENCODING_UTF_8);
- const lxb_encoding_data_t *encoding_data = NULL;
- if (docp->encoding != NULL) {
- encoding_data = lxb_encoding_data_by_name((const lxb_char_t *) docp->encoding, strlen((const char *) docp->encoding));
- }
- if (encoding_data == NULL) {
- encoding_data = lxb_encoding_data(DOM_FALLBACK_ENCODING_ID);
- ZEND_ASSERT(encoding_data != NULL);
- }
+ const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name((const lxb_char_t *) docp->encoding, strlen((const char *) docp->encoding));
lxb_encoding_encode_t encode;
lxb_encoding_decode_t decode;
lxb_char_t encoding_output[4096];
From f1fd15661f2d00de29900c237c8e99a0d4c12b13 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 20 Oct 2023 00:44:19 +0200
Subject: [PATCH 27/53] More and improved tests
---
.../HTMLDocument_saveHTMLFile_empty_path.phpt | 18 +++++++++
.../HTMLDocument_saveHTML_wrong_document.phpt | 17 +++++++++
.../html/interactions/getLineNo_65536.phpt | 20 ++++++++++
.../interactions/without_constructor.phpt | 20 ++++++++++
...HTMLDocument_serialize_ns_imported_02.phpt | 38 +++++++++++++++++--
...HTMLDocument_serialize_ns_imported_03.phpt | 14 ++++++-
...HTMLDocument_serialize_ns_imported_04.phpt | 14 ++++++-
...HTMLDocument_serialize_ns_imported_05.phpt | 38 +++++++++++++++++--
...HTMLDocument_serialize_ns_imported_06.phpt | 36 +++++++++++++++---
.../tests/modern/html/serializer/sample.xml | 12 ++++++
10 files changed, 210 insertions(+), 17 deletions(-)
create mode 100644 ext/dom/tests/modern/html/interactions/HTMLDocument_saveHTMLFile_empty_path.phpt
create mode 100644 ext/dom/tests/modern/html/interactions/HTMLDocument_saveHTML_wrong_document.phpt
create mode 100644 ext/dom/tests/modern/html/interactions/getLineNo_65536.phpt
create mode 100644 ext/dom/tests/modern/html/interactions/without_constructor.phpt
create mode 100644 ext/dom/tests/modern/html/serializer/sample.xml
diff --git a/ext/dom/tests/modern/html/interactions/HTMLDocument_saveHTMLFile_empty_path.phpt b/ext/dom/tests/modern/html/interactions/HTMLDocument_saveHTMLFile_empty_path.phpt
new file mode 100644
index 0000000000000..02514e54e07e1
--- /dev/null
+++ b/ext/dom/tests/modern/html/interactions/HTMLDocument_saveHTMLFile_empty_path.phpt
@@ -0,0 +1,18 @@
+--TEST--
+DOM\HTMLDocument::saveHTMLFile() empty path
+--EXTENSIONS--
+dom
+--FILE--
+appendChild($dom->createElement("root"));
+$dom->saveHTMLFile("");
+
+?>
+--EXPECTF--
+Fatal error: Uncaught ValueError: DOM\HTMLDocument::saveHTMLFile(): Argument #1 ($filename) must not be empty in %s:%d
+Stack trace:
+#0 %s(%d): DOM\HTMLDocument->saveHTMLFile('')
+#1 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/modern/html/interactions/HTMLDocument_saveHTML_wrong_document.phpt b/ext/dom/tests/modern/html/interactions/HTMLDocument_saveHTML_wrong_document.phpt
new file mode 100644
index 0000000000000..d2356068b4ece
--- /dev/null
+++ b/ext/dom/tests/modern/html/interactions/HTMLDocument_saveHTML_wrong_document.phpt
@@ -0,0 +1,17 @@
+--TEST--
+DOM\HTMLDocument::saveHTML() wrong document
+--EXTENSIONS--
+dom
+--FILE--
+saveHTML(DOM\HTMLDocument::createEmpty());
+
+?>
+--EXPECTF--
+Fatal error: Uncaught DOMException: Wrong Document Error in %s:%d
+Stack trace:
+#0 %s(%d): DOM\HTMLDocument->saveHTML(Object(DOM\HTMLDocument))
+#1 {main}
+ thrown in %s on line %d
diff --git a/ext/dom/tests/modern/html/interactions/getLineNo_65536.phpt b/ext/dom/tests/modern/html/interactions/getLineNo_65536.phpt
new file mode 100644
index 0000000000000..122bcc8aaaaef
--- /dev/null
+++ b/ext/dom/tests/modern/html/interactions/getLineNo_65536.phpt
@@ -0,0 +1,20 @@
+--TEST--
+getLineNo() returns the line number of the node >= 65536
+--EXTENSIONS--
+dom
+--FILE--
+
+
+ hello
+
+EOF;
+
+$dom = DOM\HTMLDocument::createFromString($html);
+var_dump($dom->documentElement->firstChild->nextSibling->firstChild->nextSibling->getLineNo());
+
+?>
+--EXPECT--
+int(65538)
diff --git a/ext/dom/tests/modern/html/interactions/without_constructor.phpt b/ext/dom/tests/modern/html/interactions/without_constructor.phpt
new file mode 100644
index 0000000000000..606ebfb8b286f
--- /dev/null
+++ b/ext/dom/tests/modern/html/interactions/without_constructor.phpt
@@ -0,0 +1,20 @@
+--TEST--
+Tests without running the constructor
+--EXTENSIONS--
+dom
+--FILE--
+newInstanceWithoutConstructor();
+ } catch (ReflectionException $e) {
+ echo $e->getMessage(), "\n";
+ }
+}
+
+?>
+--EXPECT--
+Class DOM\HTMLDocument is an internal class marked as final that cannot be instantiated without invoking its constructor
+Class DOM\XMLDocument is an internal class marked as final that cannot be instantiated without invoking its constructor
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_02.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_02.phpt
index ad9e6446719e0..6c3d01424dee1 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_02.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_02.phpt
@@ -5,7 +5,7 @@ dom
--FILE--
');
+$xml = DOM\XMLDocument::createFromFile(__DIR__.'/sample.xml');
$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
echo $xml->saveXML();
@@ -22,8 +22,38 @@ echo $html->saveHTML(), "\n";
?>
--EXPECT--
-
+
+
+
+
+
+
+
+
+
+
+
--- After import into HTML ---
- foo
-foo
+ foo
+
+
+
+
+
+
+
+
+
+
+foo
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_03.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_03.phpt
index 4669d2b391caf..f6b88496765aa 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_03.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_03.phpt
@@ -5,7 +5,7 @@ dom
--FILE--
');
+$xml = DOM\XMLDocument::createFromFile(__DIR__.'/sample.xml');
$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
echo $xml->saveXML();
@@ -22,7 +22,17 @@ echo $html->saveHTML(), "\n";
?>
--EXPECT--
-
+
+
+
+
+
+
+
+
+
+
+
--- After import into HTML ---
foo
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_04.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_04.phpt
index a5014119c3c33..7ecc1133f509e 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_04.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_04.phpt
@@ -5,7 +5,7 @@ dom
--FILE--
');
+$xml = DOM\XMLDocument::createFromFile(__DIR__.'/sample.xml');
$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
echo $xml->saveXML();
@@ -22,7 +22,17 @@ echo $html->saveHTML(), "\n";
?>
--EXPECT--
-
+
+
+
+
+
+
+
+
+
+
+
--- After import into HTML ---
foo
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_05.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_05.phpt
index f0a9a594fde45..588819a9b6c2f 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_05.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_05.phpt
@@ -5,7 +5,7 @@ dom
--FILE--
');
+$xml = DOM\XMLDocument::createFromFile(__DIR__.'/sample.xml');
$xml->documentElement->appendChild($xml->createElementNS('some:ns2', 'child'));
echo $xml->saveXML();
@@ -22,8 +22,38 @@ echo $html->saveHTML(), "\n";
?>
--EXPECT--
-
+
+
+
+
+
+
+
+
+
+
+
--- After adoption into HTML ---
-foo
-foo
+foo
+
+
+
+
+
+
+
+
+
+
+foo
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_06.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_06.phpt
index 0f7a70f607f51..8de70bd82fc72 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_06.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_06.phpt
@@ -5,7 +5,7 @@ dom
--FILE--
');
+$xml = DOM\XMLDocument::createFromFile(__DIR__.'/sample.xml');
$xml->documentElement->firstChild->appendChild($xml->createElementNS('some:ns2', 'child'));
echo $xml->saveXML();
@@ -14,7 +14,7 @@ echo "--- After clone + import into HTML ---\n";
$html = DOM\HTMLDocument::createFromString('foo
', LIBXML_NOERROR);
$p = $html->documentElement->firstChild->nextSibling->firstChild;
-$p->appendChild($html->adoptNode($xml->documentElement->firstChild->cloneNode(true)));
+$p->appendChild($html->adoptNode($xml->documentElement->firstElementChild->cloneNode(true)));
echo $html->saveXML();
echo $html->saveHTML(), "\n";
@@ -22,8 +22,34 @@ echo $html->saveHTML(), "\n";
?>
--EXPECT--
-
+
+
+
+
+
+
+
+
+
+
+
--- After clone + import into HTML ---
-foo
-foo
+foo
+
+
+
+
+
+
+
+
+foo
+
+
+
+
+
+
+
+
diff --git a/ext/dom/tests/modern/html/serializer/sample.xml b/ext/dom/tests/modern/html/serializer/sample.xml
new file mode 100644
index 0000000000000..e1c3c58a829b4
--- /dev/null
+++ b/ext/dom/tests/modern/html/serializer/sample.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
From 9fb0b1256df327a4c26ec8aa88fca0700f2089e6 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 20 Oct 2023 13:40:42 +0200
Subject: [PATCH 28/53] Comment and indent cleanup
---
ext/dom/html5_serializer.c | 2 +-
ext/dom/html_document.c | 7 ++++---
ext/libxml/mime_sniff.c | 12 ++++++------
3 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/ext/dom/html5_serializer.c b/ext/dom/html5_serializer.c
index daa2e0ce2ec0a..8aaf4bd3cc76a 100644
--- a/ext/dom/html5_serializer.c
+++ b/ext/dom/html5_serializer.c
@@ -329,7 +329,7 @@ static zend_result dom_html5_serialize_node(dom_html5_serialize_context *ctx, co
return SUCCESS;
}
-/* https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments
+/* https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments (Date 2023-10-18)
* Note: this serializes the _children_, excluding the node itself! */
zend_result dom_html5_serialize(dom_html5_serialize_context *ctx, const xmlNode *node)
{
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 968685ee57744..37d7987b76b55 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -538,16 +538,16 @@ PHP_METHOD(DOM_HTMLDocument, createEmpty)
}
#ifdef LIBXML_HTML_ENABLED
- xmlDocPtr lxml_doc = htmlNewDocNoDtD(NULL, NULL);
+ xmlDocPtr lxml_doc = htmlNewDocNoDtD(NULL, NULL);
if (UNEXPECTED(lxml_doc == NULL)) {
goto oom;
}
#else
- xmlDocPtr lxml_doc = xmlNewDoc((const xmlChar *) "1.0");
+ xmlDocPtr lxml_doc = xmlNewDoc((const xmlChar *) "1.0");
if (UNEXPECTED(lxml_doc == NULL)) {
goto oom;
}
- lxml_doc->type = XML_HTML_DOCUMENT_NODE;
+ lxml_doc->type = XML_HTML_DOCUMENT_NODE;
#endif
lxml_doc->encoding = xmlStrdup((const xmlChar *) encoding);
@@ -1008,6 +1008,7 @@ PHP_METHOD(DOM_HTMLDocument, saveHTML)
PHP_METHOD(DOM_HTMLDocument, __construct)
{
+ /* Private constructor cannot be called. */
ZEND_UNREACHABLE();
}
diff --git a/ext/libxml/mime_sniff.c b/ext/libxml/mime_sniff.c
index 634c7fba621f7..5692f70ee251e 100644
--- a/ext/libxml/mime_sniff.c
+++ b/ext/libxml/mime_sniff.c
@@ -269,27 +269,27 @@ PHP_LIBXML_API zend_string *php_libxml_sniff_charset_from_string(const char *sta
/* 11.8.2. Collect a sequence of code points that are not ';' */
start += collect_a_sequence_of_code_points(start, end, is_not_semicolon);
} else {
- /* 9. Otherwise */
- /* 9.1. Set parameterValue to the result of collecting a sequence of code points that are not ';' */
+ /* 11.9. Otherwise */
+ /* 11.9.1. Set parameterValue to the result of collecting a sequence of code points that are not ';' */
size_t parameter_value_length = collect_a_sequence_of_code_points(start, end, is_not_semicolon);
parameter_value = zend_string_init(start, parameter_value_length, false);
start += parameter_name_length;
- /* 9.2. Remove trailing HTTP whitespace from parameterValue */
+ /* 11.9.2. Remove trailing HTTP whitespace from parameterValue */
while (ZSTR_LEN(parameter_value) > 0 && is_http_whitespace(ZSTR_VAL(parameter_value)[ZSTR_LEN(parameter_value) - 1])) {
ZSTR_LEN(parameter_value)--;
}
ZSTR_VAL(parameter_value)[ZSTR_LEN(parameter_value)] = '\0';
- /* 9.3. Continue if parameterValue is empty */
+ /* 11.9.3. Continue if parameterValue is empty */
if (ZSTR_LEN(parameter_value) == 0) {
zend_string_release_ex(parameter_value, false);
continue;
}
}
- /* 10. We diverge from the spec here: we're only interested in charset.
- * Furthermore, as only the first match matters, we can stop immediately with the loop once we set the charset. */
+ /* 11.10. We diverge from the spec here: we're only interested in charset.
+ * Furthermore, as only the first match matters, we can stop immediately with the loop once we set the charset. */
if (parameter_name_length == strlen("charset")
&& strncasecmp(parameter_name, "charset", strlen("charset")) == 0 /* Because of lowercasing in step 11.4 */
&& solely_contains_http_quoted_string_tokens(ZSTR_VAL(parameter_value), ZSTR_LEN(parameter_value))) {
From 871ebbce46fd4a69300196afe850307369f89549 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 20 Oct 2023 13:47:36 +0200
Subject: [PATCH 29/53] Use libxml context for saveHTMLFile
---
ext/dom/html_document.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 37d7987b76b55..5add1445385b9 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -951,7 +951,7 @@ PHP_METHOD(DOM_HTMLDocument, saveHTMLFile)
RETURN_THROWS();
}
- php_stream *stream = php_stream_open_wrapper_ex(file, "wb", REPORT_ERRORS, /* opened_path */ NULL, /* context */ NULL);
+ php_stream *stream = php_stream_open_wrapper_ex(file, "wb", REPORT_ERRORS, /* opened_path */ NULL, /* context */ php_libxml_get_stream_context());
if (!stream) {
RETURN_FALSE;
}
From 4651d96198e328d3baa4218cff7318266409b0ff Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 20 Oct 2023 14:03:50 +0200
Subject: [PATCH 30/53] [ci skip] UPGRADING
---
UPGRADING | 8 ++++++++
UPGRADING.INTERNALS | 4 ++++
2 files changed, 12 insertions(+)
diff --git a/UPGRADING b/UPGRADING
index 0e7f671acb943..a43cbf1fcacf0 100644
--- a/UPGRADING
+++ b/UPGRADING
@@ -80,6 +80,14 @@ PHP 8.4 UPGRADE NOTES
. Added constant DOMNode::DOCUMENT_POSITION_CONTAINS.
. Added constant DOMNode::DOCUMENT_POSITION_CONTAINED_BY.
. Added constant DOMNode::DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC.
+ . Implemented DOM HTML5 parsing and serialization.
+ RFC: https://wiki.php.net/rfc/domdocument_html5_parser.
+ This RFC adds the new DOM namespace along with class and constant aliases.
+ There are two new classes to handle HTML and XML documents:
+ DOM\HTMLDocument and DOM\XMLDocument.
+ These classes provide a cleaner API to handle HTML and XML documents.
+ Furthermore, the DOM\HTMLDocument class implements spec-compliant HTML5
+ parsing and serialization.
- Phar:
. Added support for the unix timestamp extension for zip archives.
diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS
index dbdda5dd221c4..f3c394bf3224c 100644
--- a/UPGRADING.INTERNALS
+++ b/UPGRADING.INTERNALS
@@ -52,6 +52,10 @@ PHP 8.4 INTERNALS UPGRADE NOTES
- The function php_xsl_create_object() was removed as it was not used
nor exported.
+ d. ext/libxml
+ - Added php_libxml_pretend_ctx_error_ex() to emit errors as if they had come
+ from libxml.
+
========================
4. OpCode changes
========================
From a30e5a6a5cc9e5e049353f3f858a7f4bf3617466 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 20 Oct 2023 14:20:51 +0200
Subject: [PATCH 31/53] Propagate last error back into libxml
---
...fromString_parser_warning_internal_error.phpt | 16 ++++++++++++++++
ext/libxml/libxml.c | 9 +++++++++
2 files changed, 25 insertions(+)
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_internal_error.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_internal_error.phpt
index 43b8cc4905aae..5d8ab5704e594 100644
--- a/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_internal_error.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_internal_error.phpt
@@ -13,6 +13,8 @@ foreach (libxml_get_errors() as $error) {
var_dump($error->message, $error->line, $error->column);
}
+var_dump(libxml_get_last_error());
+
?>
--EXPECT--
string(81) "tokenizer error invalid-first-character-of-tag-name in Entity, line: 1, column: 2"
@@ -27,3 +29,17 @@ int(1)
string(71) "tree error doctype-token-in-body-mode in Entity, line: 1, column: 10-16"
int(1)
int(10)
+object(LibXMLError)#4 (6) {
+ ["level"]=>
+ int(2)
+ ["code"]=>
+ int(1)
+ ["column"]=>
+ int(10)
+ ["message"]=>
+ string(71) "tree error doctype-token-in-body-mode in Entity, line: 1, column: 10-16"
+ ["file"]=>
+ string(0) ""
+ ["line"]=>
+ int(1)
+}
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index 72e020707abcc..15d6f4074f0ec 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -35,6 +35,7 @@
#include
#include
#include
+#include
#ifdef LIBXML_SCHEMAS_ENABLED
#include
#include
@@ -815,6 +816,14 @@ PHP_LIBXML_API void php_libxml_pretend_ctx_error_ex(int line, int column, const
va_start(args, msg);
php_libxml_internal_error_handler_ex(PHP_LIBXML_CTX_ERROR, NULL, &msg, args, line, column);
va_end(args);
+
+ /* Propagate back into libxml */
+ if (LIBXML(error_list)) {
+ xmlErrorPtr last = zend_llist_get_last(LIBXML(error_list));
+ if (last) {
+ xmlCopyError(last, &xmlLastError);
+ }
+ }
}
PHP_LIBXML_API void php_libxml_ctx_error(void *ctx, const char *msg, ...)
From d161e541c8d27e838015d35f2396f5c79c19a7da Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 20 Oct 2023 14:28:44 +0200
Subject: [PATCH 32/53] Propagate file name in libxml error
---
ext/dom/html_document.c | 6 ++--
..._parser_warning_libxml_get_last_error.phpt | 30 +++++++++++++++++++
...mString_parser_warning_internal_error.phpt | 2 +-
ext/libxml/libxml.c | 5 +++-
ext/libxml/php_libxml.h | 2 +-
5 files changed, 39 insertions(+), 6 deletions(-)
create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_parser_warning_libxml_get_last_error.phpt
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 5add1445385b9..58569a769cbe2 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -247,7 +247,7 @@ static void dom_lexbor_libxml2_bridge_tokenizer_error_reporter(void *application
{
dom_lexbor_libxml2_bridge_application_data *application_data = application_data_voidptr;
dom_find_line_and_column_using_cache(application_data, &application_data->cache_tokenizer, offset);
- php_libxml_pretend_ctx_error_ex(application_data->cache_tokenizer.last_line, application_data->cache_tokenizer.last_column, "tokenizer error %s in %s, line: %zu, column: %zu\n", dom_lexbor_tokenizer_error_code_to_string(error->id), application_data->input_name, application_data->cache_tokenizer.last_line, application_data->cache_tokenizer.last_column);
+ php_libxml_pretend_ctx_error_ex(application_data->input_name, application_data->cache_tokenizer.last_line, application_data->cache_tokenizer.last_column, "tokenizer error %s in %s, line: %zu, column: %zu\n", dom_lexbor_tokenizer_error_code_to_string(error->id), application_data->input_name, application_data->cache_tokenizer.last_line, application_data->cache_tokenizer.last_column);
}
static void dom_lexbor_libxml2_bridge_tree_error_reporter(void *application_data_voidptr, lxb_html_tree_error_t *error, size_t line, size_t column, size_t len)
@@ -261,9 +261,9 @@ static void dom_lexbor_libxml2_bridge_tree_error_reporter(void *application_data
if (UNEXPECTED(len <= 1)) {
/* Possible with EOF, or single-character tokens, don't use a range in the error display in this case */
- php_libxml_pretend_ctx_error_ex(line, column, "tree error %s in %s, line: %zu, column: %zu\n", dom_lexbor_tree_error_code_to_string(error->id), application_data->input_name, line, column);
+ php_libxml_pretend_ctx_error_ex(application_data->input_name, line, column, "tree error %s in %s, line: %zu, column: %zu\n", dom_lexbor_tree_error_code_to_string(error->id), application_data->input_name, line, column);
} else {
- php_libxml_pretend_ctx_error_ex(line, column, "tree error %s in %s, line: %zu, column: %zu-%zu\n", dom_lexbor_tree_error_code_to_string(error->id), application_data->input_name, line, column, column + len - 1);
+ php_libxml_pretend_ctx_error_ex(application_data->input_name, line, column, "tree error %s in %s, line: %zu, column: %zu-%zu\n", dom_lexbor_tree_error_code_to_string(error->id), application_data->input_name, line, column, column + len - 1);
}
}
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_parser_warning_libxml_get_last_error.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_parser_warning_libxml_get_last_error.phpt
new file mode 100644
index 0000000000000..3de37d94890ba
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromFile_parser_warning_libxml_get_last_error.phpt
@@ -0,0 +1,30 @@
+--TEST--
+DOM\HTMLDocument::createFromFile() - parser warning libxml_get_last_error()
+--EXTENSIONS--
+dom
+--FILE--
+x> ';
+$dom = DOM\HTMLDocument::createFromFile(__DIR__."/parser_warning_01.html");
+
+var_dump(libxml_get_last_error());
+
+?>
+--EXPECTF--
+object(LibXMLError)#2 (6) {
+ ["level"]=>
+ int(2)
+ ["code"]=>
+ int(1)
+ ["column"]=>
+ int(2)
+ ["message"]=>
+ string(%d) "tree error unexpected-token-in-initial-mode in %sparser_warning_01.html, line: 1, column: 2-6"
+ ["file"]=>
+ string(%d) "%sparser_warning_01.html"
+ ["line"]=>
+ int(1)
+}
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_internal_error.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_internal_error.phpt
index 5d8ab5704e594..44dac9eb53923 100644
--- a/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_internal_error.phpt
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_fromString_parser_warning_internal_error.phpt
@@ -39,7 +39,7 @@ object(LibXMLError)#4 (6) {
["message"]=>
string(71) "tree error doctype-token-in-body-mode in Entity, line: 1, column: 10-16"
["file"]=>
- string(0) ""
+ string(6) "Entity"
["line"]=>
int(1)
}
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index 15d6f4074f0ec..b3798107abd99 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -810,7 +810,7 @@ static xmlParserInputPtr _php_libxml_pre_ext_ent_loader(const char *URL,
}
}
-PHP_LIBXML_API void php_libxml_pretend_ctx_error_ex(int line, int column, const char *msg,...)
+PHP_LIBXML_API void php_libxml_pretend_ctx_error_ex(const char *file, int line, int column, const char *msg,...)
{
va_list args;
va_start(args, msg);
@@ -821,6 +821,9 @@ PHP_LIBXML_API void php_libxml_pretend_ctx_error_ex(int line, int column, const
if (LIBXML(error_list)) {
xmlErrorPtr last = zend_llist_get_last(LIBXML(error_list));
if (last) {
+ if (!last->file) {
+ last->file = strdup(file);
+ }
xmlCopyError(last, &xmlLastError);
}
}
diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h
index 3e0b6f7dcf563..2ffbc5b24930c 100644
--- a/ext/libxml/php_libxml.h
+++ b/ext/libxml/php_libxml.h
@@ -132,7 +132,7 @@ PHP_LIBXML_API void php_libxml_node_free_resource(xmlNodePtr node);
PHP_LIBXML_API void php_libxml_node_decrement_resource(php_libxml_node_object *object);
PHP_LIBXML_API void php_libxml_error_handler(void *ctx, const char *msg, ...);
PHP_LIBXML_API void php_libxml_ctx_warning(void *ctx, const char *msg, ...);
-PHP_LIBXML_API void php_libxml_pretend_ctx_error_ex(int line, int column, const char *msg,...);
+PHP_LIBXML_API void php_libxml_pretend_ctx_error_ex(const char *file, int line, int column, const char *msg,...);
PHP_LIBXML_API void php_libxml_ctx_error(void *ctx, const char *msg, ...);
PHP_LIBXML_API int php_libxml_xmlCheckUTF8(const unsigned char *s);
PHP_LIBXML_API void php_libxml_switch_context(zval *context, zval *oldcontext);
From 23612b0e81a0471b7d974676170157d9353aa34b Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 22 Oct 2023 17:33:13 +0200
Subject: [PATCH 33/53] Update doctype hint in ext/xsl
---
ext/xsl/php_xsl.stub.php | 8 ++++----
ext/xsl/php_xsl_arginfo.h | 2 +-
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/ext/xsl/php_xsl.stub.php b/ext/xsl/php_xsl.stub.php
index 55ad36150bf67..9394562425d78 100644
--- a/ext/xsl/php_xsl.stub.php
+++ b/ext/xsl/php_xsl.stub.php
@@ -76,25 +76,25 @@ class XSLTProcessor
public bool $cloneDocument = false;
/**
- * @param DOMDocument|SimpleXMLElement $stylesheet
+ * @param DOM\Document|SimpleXMLElement $stylesheet
* @tentative-return-type
*/
public function importStylesheet(object $stylesheet): bool {}
/**
- * @param DOMDocument|SimpleXMLElement $document
+ * @param DOM\Document|SimpleXMLElement $document
* @tentative-return-type
*/
public function transformToDoc(object $document, ?string $returnClass = null): object|false {}
/**
- * @param DOMDocument|SimpleXMLElement $document
+ * @param DOM\Document|SimpleXMLElement $document
* @tentative-return-type
*/
public function transformToUri(object $document, string $uri): int {}
/**
- * @param DOMDocument|SimpleXMLElement $document
+ * @param DOM\Document|SimpleXMLElement $document
* @tentative-return-type
*/
public function transformToXml(object $document): string|null|false {}
diff --git a/ext/xsl/php_xsl_arginfo.h b/ext/xsl/php_xsl_arginfo.h
index d1c23d4bfe6f1..92a7ab61e7810 100644
--- a/ext/xsl/php_xsl_arginfo.h
+++ b/ext/xsl/php_xsl_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 5518a63a4adec49c81e650d620ce2dbce41d8d65 */
+ * Stub hash: 87ea452722956b6cfe46458e7fcd97f0bcfb767b */
ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_XSLTProcessor_importStylesheet, 0, 1, _IS_BOOL, 0)
ZEND_ARG_TYPE_INFO(0, stylesheet, IS_OBJECT, 0)
From 141023845cb24cfcadb69d0d33a91b722ca5c782 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 22 Oct 2023 17:34:09 +0200
Subject: [PATCH 34/53] Update error message wording of abstract class
---
ext/dom/document.c | 2 +-
.../html/interactions/HTMLDocument_registerNodeClass_02.phpt | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/ext/dom/document.c b/ext/dom/document.c
index ba696eb2f5758..ae38ded819bb8 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -2074,7 +2074,7 @@ PHP_METHOD(DOM_Document, registerNodeClass)
}
if (basece->ce_flags & ZEND_ACC_ABSTRACT) {
- zend_argument_value_error(1, "must be a non-abstract class");
+ zend_argument_value_error(1, "must not be an abstract class");
RETURN_THROWS();
}
diff --git a/ext/dom/tests/modern/html/interactions/HTMLDocument_registerNodeClass_02.phpt b/ext/dom/tests/modern/html/interactions/HTMLDocument_registerNodeClass_02.phpt
index 774dceaa59780..f9b8493cda34a 100644
--- a/ext/dom/tests/modern/html/interactions/HTMLDocument_registerNodeClass_02.phpt
+++ b/ext/dom/tests/modern/html/interactions/HTMLDocument_registerNodeClass_02.phpt
@@ -27,7 +27,7 @@ $element->ownerDocument->foo();
?>
--EXPECTF--
-DOM\Document::registerNodeClass(): Argument #1 ($baseClass) must be a non-abstract class
+DOM\Document::registerNodeClass(): Argument #1 ($baseClass) must not be an abstract class
string(11) "DOMDocument"
Fatal error: Uncaught Error: Call to undefined method DOMDocument::foo() in %s:%d
From 44b966311b6bc0e94f7eb8cdc0ab74ca230f515d Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 22 Oct 2023 17:43:22 +0200
Subject: [PATCH 35/53] Add test for incompatible override_encoding and charset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Tim Düsterhus
---
...verride_encoding_incompatible_charset.phpt | 36 +++++++++++++++++++
1 file changed, 36 insertions(+)
create mode 100644 ext/dom/tests/modern/html/encoding/HTMLDocument_override_encoding_incompatible_charset.phpt
diff --git a/ext/dom/tests/modern/html/encoding/HTMLDocument_override_encoding_incompatible_charset.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_override_encoding_incompatible_charset.phpt
new file mode 100644
index 0000000000000..63a1fe0040f95
--- /dev/null
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_override_encoding_incompatible_charset.phpt
@@ -0,0 +1,36 @@
+--TEST--
+DOM\HTMLDocument: override_encoding with incompatible charset
+--EXTENSIONS--
+iconv
+dom
+--FILE--
+
+
+
+
+
+
+
+
+
+
+ DOC,
+ ),
+ override_encoding: 'utf-8'
+);
+
+var_dump(iconv('UTF-8', 'ISO-8859-1', $doc->getElementsByTagName('title')->item(0)->textContent));
+var_dump(iconv('UTF-8', 'ISO-8859-1', $doc->getElementsByTagName('body')->item(0)->textContent));
+?>
+--EXPECT--
+string(3) ""
+string(9) "
+
+
+"
From 0b10fdc02a408e0f81669ba6e1d1efe3f8307179 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 22 Oct 2023 20:35:12 +0200
Subject: [PATCH 36/53] Test behaviour of XML-style namespaces in HTMLDocument
---
.../html/parser/xml_style_namespace.phpt | 57 +++++++++++++++++++
1 file changed, 57 insertions(+)
create mode 100644 ext/dom/tests/modern/html/parser/xml_style_namespace.phpt
diff --git a/ext/dom/tests/modern/html/parser/xml_style_namespace.phpt b/ext/dom/tests/modern/html/parser/xml_style_namespace.phpt
new file mode 100644
index 0000000000000..08c2756f2cdf3
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/xml_style_namespace.phpt
@@ -0,0 +1,57 @@
+--TEST--
+HTMLDocument: XML-style namespace
+--EXTENSIONS--
+dom
+--FILE--
+
+
+
+ Test
+
+
+
+
+
+HTML);
+
+echo "--- Namespaces ---\n";
+$xpath = new DOMXPath($dom);
+foreach ($xpath->query("//*[name()='body']//*") as $node) {
+ echo $node->nodeName, " ", $node->namespaceURI ?? "(NONE)", "\n";
+ echo "prefix: \"", $node->prefix, "\"\n";
+ foreach ($node->attributes as $attribute) {
+ echo " Attribute: ", $attribute->nodeName, " ", $attribute->namespaceURI ?? "(NONE)", "\n";
+ }
+}
+
+echo "--- HTML serialization ---\n";
+echo $dom->saveHTML(), "\n";
+echo "--- XML serialization ---\n";
+echo $dom->saveXML();
+
+?>
+--EXPECT--
+--- Namespaces ---
+foo:bar http://www.w3.org/1999/xhtml
+prefix: ""
+--- HTML serialization ---
+
+ Test
+
+
+
+
+
+--- XML serialization ---
+
+
+
+ Test
+
+
+
+
+
From 965732d78b13101b573b6dc02ab7738c6e416b9a Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 22 Oct 2023 20:45:36 +0200
Subject: [PATCH 37/53] Process review feedback
---
ext/dom/html5_parser.c | 2 ++
ext/dom/html5_serializer.c | 4 +++-
ext/dom/html_document.c | 16 ++++++++--------
ext/dom/php_dom.stub.php | 2 +-
ext/dom/php_dom_arginfo.h | 2 +-
ext/dom/xml_document.c | 8 ++++----
6 files changed, 19 insertions(+), 15 deletions(-)
diff --git a/ext/dom/html5_parser.c b/ext/dom/html5_parser.c
index bddccd17b153b..095adb8d2dbcd 100644
--- a/ext/dom/html5_parser.c
+++ b/ext/dom/html5_parser.c
@@ -206,6 +206,8 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(lxb_html_doc
#ifdef LIBXML_HTML_ENABLED
xmlDocPtr lxml_doc = htmlNewDocNoDtD(NULL, NULL);
#else
+ /* If HTML support is not enabled, then htmlNewDocNoDtD() is not available.
+ * This code mimics the behaviour. */
xmlDocPtr lxml_doc = xmlNewDoc((const xmlChar *) "1.0");
lxml_doc->type = XML_HTML_DOCUMENT_NODE;
#endif
diff --git a/ext/dom/html5_serializer.c b/ext/dom/html5_serializer.c
index 8aaf4bd3cc76a..af6f5551effca 100644
--- a/ext/dom/html5_serializer.c
+++ b/ext/dom/html5_serializer.c
@@ -131,7 +131,9 @@ static zend_result dom_html5_serialize_text_node(dom_html5_serialize_context *ct
if (node->parent->type == XML_ELEMENT_NODE && dom_is_html_ns(node->parent)) {
const xmlNode *parent = node->parent;
size_t name_length = strlen((const char *) parent->name);
- /* Note: is not handled because scripting is not enabled because the user agent (PHP) does not support (JS) scripting */
+ /* Spec tells us to only emit noscript content as-is if scripting is enabled.
+ * However, the user agent (PHP) does not support (JS) scripting.
+ * Furthermore, if actually consumed by a browser then we should err on the safe side and not emit the content as-is. */
if (dom_local_name_compare_ex(parent, "style", strlen("style"), name_length)
|| dom_local_name_compare_ex(parent, "script", strlen("script"), name_length)
|| dom_local_name_compare_ex(parent, "xmp", strlen("xmp"), name_length)
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 58569a769cbe2..536383380d6b8 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -68,7 +68,7 @@ typedef struct {
typedef struct {
/* We can skip some conversion if the input and output encoding are both UTF-8, we only have to validate and substitute replacement characters */
- bool fast_path; /* Put first, close to the encode & decode structures, for cache locality */
+ bool fast_path; /* Put first, near the encode & decode structures, for cache locality */
lxb_encoding_encode_t encode;
lxb_encoding_decode_t decode;
const lxb_encoding_data_t *encode_data;
@@ -215,7 +215,7 @@ static void dom_find_line_and_column_using_cache(const dom_lexbor_libxml2_bridge
/* Either unicode or UTF-8 data */
if (application_data->current_input_codepoints != NULL) {
while (cache->last_offset < offset) {
- if (application_data->current_input_codepoints[cache->last_offset] == 0x000A) {
+ if (application_data->current_input_codepoints[cache->last_offset] == 0x000A /* Unicode codepoint for line feed */) {
cache->last_line++;
cache->last_column = 1;
} else {
@@ -512,11 +512,11 @@ static bool dom_parse_decode_encode_finish(lexbor_libxml2_bridge_parse_context *
return true;
}
-static bool check_options_validity(zend_long options)
+static bool check_options_validity(uint32_t arg_num, zend_long options)
{
const zend_long VALID_OPTIONS = XML_PARSE_NOERROR | XML_PARSE_COMPACT | HTML_PARSE_NOIMPLIED | DOM_HTML_NO_DEFAULT_NS;
if ((options & ~VALID_OPTIONS) != 0) {
- zend_argument_value_error(2, "contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\\NO_DEFAULT_NS)");
+ zend_argument_value_error(arg_num, "contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\\NO_DEFAULT_NS)");
return false;
}
return true;
@@ -526,7 +526,7 @@ PHP_METHOD(DOM_HTMLDocument, createEmpty)
{
const char *encoding = "UTF-8";
size_t encoding_len = strlen("UTF-8");
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &encoding, &encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "|p", &encoding, &encoding_len) == FAILURE) {
RETURN_THROWS();
}
@@ -570,7 +570,7 @@ PHP_METHOD(DOM_HTMLDocument, createFromString)
RETURN_THROWS();
}
- if (!check_options_validity(options)) {
+ if (!check_options_validity(2, options)) {
RETURN_THROWS();
}
@@ -673,7 +673,7 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
size_t filename_len, override_encoding_len;
zend_long options = 0;
php_stream *stream = NULL;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "p|ls!", &filename, &filename_len, &options, &override_encoding, &override_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "p|lp!", &filename, &filename_len, &options, &override_encoding, &override_encoding_len) == FAILURE) {
RETURN_THROWS();
}
@@ -683,7 +683,7 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
RETURN_THROWS();
}
- if (!check_options_validity(options)) {
+ if (!check_options_validity(2, options)) {
RETURN_THROWS();
}
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index 927a392e374d1..72c55d3ab35ed 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -1061,7 +1061,7 @@ public function createAttribute(string $localName) {}
/** @return Attr|false */
public function createAttributeNS(?string $namespace, string $qualifiedName) {}
- /** @return CDataSection|false */
+ /** @return CDATASection|false */
public function createCDATASection(string $data) {}
/** @tentative-return-type */
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index 177f86510dcbd..a2b8562bd8f9e 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: bc74a857ba008dd3ba6b86256c154050b7c07552 */
+ * Stub hash: 512273df3d4b8f601ffb0d78d6b4ec289fb68d6d */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
diff --git a/ext/dom/xml_document.c b/ext/dom/xml_document.c
index 9706de5a847fd..78ebb5d6c503f 100644
--- a/ext/dom/xml_document.c
+++ b/ext/dom/xml_document.c
@@ -23,7 +23,7 @@
#include "php_dom.h"
#include "namespace_compat.h"
-static bool check_options_validity(zend_long options)
+static bool check_options_validity(uint32_t arg_num, zend_long options)
{
const zend_long VALID_OPTIONS = XML_PARSE_NOENT | XML_PARSE_DTDLOAD | XML_PARSE_DTDATTR | XML_PARSE_DTDVALID | XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NOBLANKS | XML_PARSE_XINCLUDE | XML_PARSE_NSCLEAN | XML_PARSE_NOCDATA | XML_PARSE_NONET | XML_PARSE_PEDANTIC | XML_PARSE_COMPACT | XML_PARSE_HUGE | XML_PARSE_BIG_LINES;
if ((options & ~VALID_OPTIONS) != 0) {
@@ -107,7 +107,7 @@ PHP_METHOD(DOM_XMLDocument, createEmpty)
size_t encoding_len = strlen("UTF-8");
const char *encoding = "UTF-8";
size_t version_len;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &version, &version_len, &encoding, &encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "|sp", &version, &version_len, &encoding, &encoding_len) == FAILURE) {
RETURN_THROWS();
}
@@ -120,7 +120,7 @@ PHP_METHOD(DOM_XMLDocument, createEmpty)
RETURN_THROWS();
}
- xmlDocPtr lxml_doc = xmlNewDoc((const xmlChar *) version);
+ xmlDocPtr lxml_doc = xmlNewDoc((const xmlChar *) version);
if (UNEXPECTED(lxml_doc == NULL)) {
goto oom;
}
@@ -161,7 +161,7 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
RETURN_THROWS();
}
- if (!check_options_validity(options)) {
+ if (!check_options_validity(2, options)) {
RETURN_THROWS();
}
From bf89e8033e63001abfd6b8a51559889584025327 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 28 Oct 2023 20:45:56 +0200
Subject: [PATCH 38/53] Use canonical names in the return types and argument
types
---
ext/dom/php_dom.stub.php | 18 +++++++++---------
ext/dom/php_dom_arginfo.h | 28 ++++++++++++++++++----------
2 files changed, 27 insertions(+), 19 deletions(-)
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index 72c55d3ab35ed..e7099ae204948 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -1086,13 +1086,13 @@ public function createTextNode(string $data): Text {}
public function getElementById(string $elementId): ?Element {}
/** @tentative-return-type */
- public function getElementsByTagName(string $qualifiedName): \DOMNodeList {}
+ public function getElementsByTagName(string $qualifiedName): NodeList {}
/** @tentative-return-type */
- public function getElementsByTagNameNS(?string $namespace, string $localName): \DOMNodeList {}
+ public function getElementsByTagNameNS(?string $namespace, string $localName): NodeList {}
/** @return Node|false */
- public function importNode(\DOMNode $node, bool $deep = false) {}
+ public function importNode(Node $node, bool $deep = false) {}
/** @tentative-return-type */
public function normalizeDocument(): void {}
@@ -1115,7 +1115,7 @@ public function relaxNGValidateSource(string $source): bool {}
#endif
/** @tentative-return-type */
- public function adoptNode(\DOMNode $node): \DOMNode|false {}
+ public function adoptNode(Node $node): Node|false {}
/**
* @param Node|string $nodes
@@ -1144,12 +1144,12 @@ public static function createFromFile(string $path, int $options = 0, ?string $o
public static function createFromString(string $source, int $options = 0, ?string $override_encoding = null): HTMLDocument {}
/** @implementation-alias DOMDocument::saveXML */
- public function saveXML(?\DOMNode $node = null, int $options = 0): string|false {}
+ public function saveXML(?Node $node = null, int $options = 0): string|false {}
/** @implementation-alias DOMDocument::save */
public function saveXMLFile(string $filename, int $options = 0): int|false {}
- public function saveHTML(?\DOMNode $node = null): string|false {}
+ public function saveHTML(?Node $node = null): string|false {}
public function saveHTMLFile(string $filename): int|false {}
}
@@ -1190,7 +1190,7 @@ public static function createFromString(string $source, int $options = 0, ?strin
/**
* @implementation-alias DOMDocument::createEntityReference
- * @return DOMEntityReference|false
+ * @return EntityReference|false
*/
public function createEntityReference(string $name) {}
@@ -1210,7 +1210,7 @@ public function xinclude(int $options = 0): int|false {}
* @tentative-return-type
* @implementation-alias DOMDocument::saveXML
*/
- public function saveXML(?\DOMNode $node = null, int $options = 0): string|false {}
+ public function saveXML(?Node $node = null, int $options = 0): string|false {}
/**
* @tentative-return-type
@@ -1220,5 +1220,5 @@ public function saveXMLFile(string $filename, int $options = 0): int|false {}
}
/** @implementation-alias dom_import_simplexml */
- function import_simplexml(object $node): DOMElement {}
+ function import_simplexml(object $node): Element {}
}
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index a2b8562bd8f9e..eaa15f864ccbd 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,11 +1,11 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 512273df3d4b8f601ffb0d78d6b4ec289fb68d6d */
+ * Stub hash: 77c532f4d00b3489e09ee9753e2cb6dd42152eed */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_DOM_import_simplexml, 0, 1, DOM\\DOMElement, 0)
+ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_DOM_import_simplexml, 0, 1, DOM\\Element, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
ZEND_END_ARG_INFO()
@@ -493,12 +493,17 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getE
ZEND_ARG_TYPE_INFO(0, elementId, IS_STRING, 0)
ZEND_END_ARG_INFO()
-#define arginfo_class_DOM_Document_getElementsByTagName arginfo_class_DOMElement_getElementsByTagName
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getElementsByTagName, 0, 1, DOM\\NodeList, 0)
+ ZEND_ARG_TYPE_INFO(0, qualifiedName, IS_STRING, 0)
+ZEND_END_ARG_INFO()
-#define arginfo_class_DOM_Document_getElementsByTagNameNS arginfo_class_DOMElement_getElementsByTagNameNS
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getElementsByTagNameNS, 0, 2, DOM\\NodeList, 0)
+ ZEND_ARG_TYPE_INFO(0, namespace, IS_STRING, 1)
+ ZEND_ARG_TYPE_INFO(0, localName, IS_STRING, 0)
+ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_Document_importNode, 0, 0, 1)
- ZEND_ARG_OBJ_INFO(0, node, DOMNode, 0)
+ ZEND_ARG_OBJ_INFO(0, node, DOM\\Node, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, deep, _IS_BOOL, 0, "false")
ZEND_END_ARG_INFO()
@@ -535,8 +540,8 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOM_Document_rel
ZEND_END_ARG_INFO()
#endif
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_TYPE_MASK_EX(arginfo_class_DOM_Document_adoptNode, 0, 1, DOMNode, MAY_BE_FALSE)
- ZEND_ARG_OBJ_INFO(0, node, DOMNode, 0)
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_TYPE_MASK_EX(arginfo_class_DOM_Document_adoptNode, 0, 1, DOM\\Node, MAY_BE_FALSE)
+ ZEND_ARG_OBJ_INFO(0, node, DOM\\Node, 0)
ZEND_END_ARG_INFO()
#define arginfo_class_DOM_Document_append arginfo_class_DOMParentNode_append
@@ -564,7 +569,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_HTMLDocument_createFrom
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveXML, 0, 0, MAY_BE_STRING|MAY_BE_FALSE)
- ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOMNode, 1, "null")
+ ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOM\\Node, 1, "null")
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
ZEND_END_ARG_INFO()
@@ -574,7 +579,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveXMLFi
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveHTML, 0, 0, MAY_BE_STRING|MAY_BE_FALSE)
- ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOMNode, 1, "null")
+ ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOM\\Node, 1, "null")
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveHTMLFile, 0, 1, MAY_BE_LONG|MAY_BE_FALSE)
@@ -606,7 +611,10 @@ ZEND_END_ARG_INFO()
#define arginfo_class_DOM_XMLDocument_xinclude arginfo_class_DOMDocument_xinclude
-#define arginfo_class_DOM_XMLDocument_saveXML arginfo_class_DOMDocument_saveXML
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_MASK_EX(arginfo_class_DOM_XMLDocument_saveXML, 0, 0, MAY_BE_STRING|MAY_BE_FALSE)
+ ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOM\\Node, 1, "null")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ZEND_END_ARG_INFO()
#define arginfo_class_DOM_XMLDocument_saveXMLFile arginfo_class_DOMDocument_save
From 7ba177e2db38a38b6780c61a41d5bc4a5148f1ae Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 28 Oct 2023 20:51:43 +0200
Subject: [PATCH 39/53] Use consistently camelCase for overrideEncoding
---
ext/dom/php_dom.stub.php | 8 ++++----
ext/dom/php_dom_arginfo.h | 10 +++++-----
.../HTMLDocument_createFromFile_override_encoding.phpt | 10 +++++-----
...TMLDocument_createFromString_override_encoding.phpt | 10 +++++-----
...ocument_override_encoding_incompatible_charset.phpt | 4 ++--
.../XMLDocument_createFromFile_override_encoding.phpt | 10 +++++-----
...XMLDocument_createFromString_override_encoding.phpt | 10 +++++-----
7 files changed, 31 insertions(+), 31 deletions(-)
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index e7099ae204948..34d9e294278c9 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -1139,9 +1139,9 @@ private function __construct() {}
public static function createEmpty(string $encoding = "UTF-8"): HTMLDocument {}
- public static function createFromFile(string $path, int $options = 0, ?string $override_encoding = null): HTMLDocument {}
+ public static function createFromFile(string $path, int $options = 0, ?string $overrideEncoding = null): HTMLDocument {}
- public static function createFromString(string $source, int $options = 0, ?string $override_encoding = null): HTMLDocument {}
+ public static function createFromString(string $source, int $options = 0, ?string $overrideEncoding = null): HTMLDocument {}
/** @implementation-alias DOMDocument::saveXML */
public function saveXML(?Node $node = null, int $options = 0): string|false {}
@@ -1161,9 +1161,9 @@ private function __construct() {}
public static function createEmpty(string $version = "1.0", string $encoding = "UTF-8"): XMLDocument {}
- public static function createFromFile(string $path, int $options = 0, ?string $override_encoding = null): XMLDocument {}
+ public static function createFromFile(string $path, int $options = 0, ?string $overrideEncoding = null): XMLDocument {}
- public static function createFromString(string $source, int $options = 0, ?string $override_encoding = null): XMLDocument {}
+ public static function createFromString(string $source, int $options = 0, ?string $overrideEncoding = null): XMLDocument {}
/** @readonly */
public ?string $xmlEncoding;
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index eaa15f864ccbd..b3e85b4c5d7aa 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 77c532f4d00b3489e09ee9753e2cb6dd42152eed */
+ * Stub hash: 00c85801438f6e1054b82fd376d287488ec52a2f */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -559,13 +559,13 @@ ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_HTMLDocument_createFromFile, 0, 1, DOM\\HTMLDocument, 0)
ZEND_ARG_TYPE_INFO(0, path, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
- ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, override_encoding, IS_STRING, 1, "null")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, overrideEncoding, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_HTMLDocument_createFromString, 0, 1, DOM\\HTMLDocument, 0)
ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
- ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, override_encoding, IS_STRING, 1, "null")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, overrideEncoding, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveXML, 0, 0, MAY_BE_STRING|MAY_BE_FALSE)
@@ -596,13 +596,13 @@ ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_XMLDocument_createFromFile, 0, 1, DOM\\XMLDocument, 0)
ZEND_ARG_TYPE_INFO(0, path, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
- ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, override_encoding, IS_STRING, 1, "null")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, overrideEncoding, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_XMLDocument_createFromString, 0, 1, DOM\\XMLDocument, 0)
ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
- ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, override_encoding, IS_STRING, 1, "null")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, overrideEncoding, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()
#define arginfo_class_DOM_XMLDocument_createEntityReference arginfo_class_DOMDocument_createEntityReference
diff --git a/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_override_encoding.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_override_encoding.phpt
index 041447cf4bb7c..c0d1bd58fdfd1 100644
--- a/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_override_encoding.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_override_encoding.phpt
@@ -1,27 +1,27 @@
--TEST--
-DOM\HTMLDocument::createFromFile() with override_encoding
+DOM\HTMLDocument::createFromFile() with overrideEncoding
--EXTENSIONS--
dom
--FILE--
getMessage(), "\n";
}
-$dom = DOM\HTMLDocument::createFromFile(__DIR__ . '/gb18030_without_charset.html', override_encoding: 'GB18030');
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . '/gb18030_without_charset.html', overrideEncoding: 'GB18030');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
-$dom = DOM\HTMLDocument::createFromFile(__DIR__ . '/fallback_encoding.html', override_encoding: 'Windows-1252');
+$dom = DOM\HTMLDocument::createFromFile(__DIR__ . '/fallback_encoding.html', overrideEncoding: 'Windows-1252');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
?>
--EXPECT--
-DOM\HTMLDocument::createFromFile(): Argument #3 ($override_encoding) must be a valid document encoding
+DOM\HTMLDocument::createFromFile(): Argument #3 ($overrideEncoding) must be a valid document encoding
string(20) "
Héllo, world!
"
diff --git a/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromString_override_encoding.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromString_override_encoding.phpt
index c6382a3fae900..8aebc0fb6d696 100644
--- a/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromString_override_encoding.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromString_override_encoding.phpt
@@ -1,27 +1,27 @@
--TEST--
-DOM\HTMLDocument::createFromString() with override_encoding
+DOM\HTMLDocument::createFromString() with overrideEncoding
--EXTENSIONS--
dom
--FILE--
getMessage(), "\n";
}
-$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . '/gb18030_without_charset.html'), override_encoding: 'GB18030');
+$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . '/gb18030_without_charset.html'), overrideEncoding: 'GB18030');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
-$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . '/fallback_encoding.html'), override_encoding: 'Windows-1252');
+$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . '/fallback_encoding.html'), overrideEncoding: 'Windows-1252');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
?>
--EXPECT--
-DOM\HTMLDocument::createFromString(): Argument #3 ($override_encoding) must be a valid document encoding
+DOM\HTMLDocument::createFromString(): Argument #3 ($overrideEncoding) must be a valid document encoding
string(20) "
Héllo, world!
"
diff --git a/ext/dom/tests/modern/html/encoding/HTMLDocument_override_encoding_incompatible_charset.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_override_encoding_incompatible_charset.phpt
index 63a1fe0040f95..737f6aca4285a 100644
--- a/ext/dom/tests/modern/html/encoding/HTMLDocument_override_encoding_incompatible_charset.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_override_encoding_incompatible_charset.phpt
@@ -1,5 +1,5 @@
--TEST--
-DOM\HTMLDocument: override_encoding with incompatible charset
+DOM\HTMLDocument: overrideEncoding with incompatible charset
--EXTENSIONS--
iconv
dom
@@ -22,7 +22,7 @@ $doc = DOM\HTMLDocument::createFromString(
DOC,
),
- override_encoding: 'utf-8'
+ overrideEncoding: 'utf-8'
);
var_dump(iconv('UTF-8', 'ISO-8859-1', $doc->getElementsByTagName('title')->item(0)->textContent));
diff --git a/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt b/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt
index 088471d1c1106..e4519f9d184bf 100644
--- a/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt
+++ b/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt
@@ -1,27 +1,27 @@
--TEST--
-DOM\XMLDocument::createFromFile() with override_encoding
+DOM\XMLDocument::createFromFile() with overrideEncoding
--EXTENSIONS--
dom
--FILE--
getMessage(), "\n";
}
-$dom = DOM\XMLDocument::createFromFile(__DIR__ . '/dummy.xml', override_encoding: 'UTF-8');
+$dom = DOM\XMLDocument::createFromFile(__DIR__ . '/dummy.xml', overrideEncoding: 'UTF-8');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
-$dom = DOM\XMLDocument::createFromFile(__DIR__ . '/dummy.xml', override_encoding: 'Windows-1252');
+$dom = DOM\XMLDocument::createFromFile(__DIR__ . '/dummy.xml', overrideEncoding: 'Windows-1252');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
?>
--EXPECT--
-DOM\XMLDocument::createFromFile(): Argument #3 ($override_encoding) must be a valid document encoding
+DOM\XMLDocument::createFromFile(): Argument #3 ($overrideEncoding) must be a valid document encoding
string(2) "é"
NULL
string(4) "é"
diff --git a/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt b/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt
index 4247e3267b7f2..cee18f52354ad 100644
--- a/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt
+++ b/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt
@@ -1,27 +1,27 @@
--TEST--
-DOM\XMLDocument::createFromString() with override_encoding
+DOM\XMLDocument::createFromString() with overrideEncoding
--EXTENSIONS--
dom
--FILE--
getMessage(), "\n";
}
-$dom = DOM\XMLDocument::createFromString(file_get_contents(__DIR__ . '/dummy.xml'), override_encoding: 'UTF-8');
+$dom = DOM\XMLDocument::createFromString(file_get_contents(__DIR__ . '/dummy.xml'), overrideEncoding: 'UTF-8');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
-$dom = DOM\XMLDocument::createFromString(file_get_contents(__DIR__ . '/dummy.xml'), override_encoding: 'Windows-1252');
+$dom = DOM\XMLDocument::createFromString(file_get_contents(__DIR__ . '/dummy.xml'), overrideEncoding: 'Windows-1252');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
?>
--EXPECT--
-DOM\XMLDocument::createFromString(): Argument #3 ($override_encoding) must be a valid document encoding
+DOM\XMLDocument::createFromString(): Argument #3 ($overrideEncoding) must be a valid document encoding
string(2) "é"
NULL
string(4) "é"
From 3dbbf19b65dd6714cbf7c744f8d88bcad78c2371 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Tue, 31 Oct 2023 20:11:24 +0100
Subject: [PATCH 40/53] Split long lines in html_document
---
ext/dom/html_document.c | 68 +++++++++++++++++++++++++++++++++++++----
1 file changed, 62 insertions(+), 6 deletions(-)
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 536383380d6b8..d23b89c20e2f3 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -435,16 +435,44 @@ static bool dom_decode_encode_fast_path(lexbor_libxml2_bridge_parse_context *ctx
lxb_codepoint_t codepoint = decoding_encoding_ctx->decode_data->decode_single(&decoding_encoding_ctx->decode, &buf_ref, buf_end);
if (UNEXPECTED(codepoint > LXB_ENCODING_MAX_CODEPOINT)) {
size_t skip = buf_ref - buf_ref_backup; /* Skip invalid data, it's replaced by the UTF-8 replacement bytes */
- if (!dom_process_parse_chunk(ctx, document, parser, buf_ref - last_output - skip, last_output, buf_ref - last_output, tokenizer_error_offset, tree_error_offset)) {
+ if (!dom_process_parse_chunk(
+ ctx,
+ document,
+ parser,
+ buf_ref - last_output - skip,
+ last_output,
+ buf_ref - last_output,
+ tokenizer_error_offset,
+ tree_error_offset
+ )) {
goto fail_oom;
}
- if (!dom_process_parse_chunk(ctx, document, parser, LXB_ENCODING_REPLACEMENT_SIZE, LXB_ENCODING_REPLACEMENT_BYTES, 0, tokenizer_error_offset, tree_error_offset)) {
+ if (!dom_process_parse_chunk(
+ ctx,
+ document,
+ parser,
+ LXB_ENCODING_REPLACEMENT_SIZE,
+ LXB_ENCODING_REPLACEMENT_BYTES,
+ 0,
+ tokenizer_error_offset,
+ tree_error_offset
+ )) {
goto fail_oom;
}
last_output = buf_ref;
}
}
- if (buf_ref != last_output && !dom_process_parse_chunk(ctx, document, parser, buf_ref - last_output, last_output, buf_ref - last_output, tokenizer_error_offset, tree_error_offset)) {
+ if (buf_ref != last_output
+ && !dom_process_parse_chunk(
+ ctx,
+ document,
+ parser,
+ buf_ref - last_output,
+ last_output,
+ buf_ref - last_output,
+ tokenizer_error_offset,
+ tree_error_offset
+ )) {
goto fail_oom;
}
*buf_ref_ref = buf_ref;
@@ -467,7 +495,16 @@ static bool dom_decode_encode_slow_path(lexbor_libxml2_bridge_parse_context *ctx
do {
encode_status = decoding_encoding_ctx->encode_data->encode(&decoding_encoding_ctx->encode, &codepoints_ref, codepoints_end);
ZEND_ASSERT(encode_status != LXB_STATUS_ERROR && "parameters and replacements should be valid");
- if (!dom_process_parse_chunk(ctx, document, parser, lxb_encoding_encode_buf_used(&decoding_encoding_ctx->encode), decoding_encoding_ctx->encoding_output, decoding_buffer_used, tokenizer_error_offset, tree_error_offset)) {
+ if (!dom_process_parse_chunk(
+ ctx,
+ document,
+ parser,
+ lxb_encoding_encode_buf_used(&decoding_encoding_ctx->encode),
+ decoding_encoding_ctx->encoding_output,
+ decoding_buffer_used,
+ tokenizer_error_offset,
+ tree_error_offset
+ )) {
goto fail_oom;
}
lxb_encoding_encode_buf_used_set(&decoding_encoding_ctx->encode, 0);
@@ -500,13 +537,32 @@ static bool dom_parse_decode_encode_finish(lexbor_libxml2_bridge_parse_context *
const lxb_codepoint_t *codepoints_ref = (const lxb_codepoint_t *) decoding_encoding_ctx->codepoints;
const lxb_codepoint_t *codepoints_end = codepoints_ref + decoding_buffer_size;
(void) decoding_encoding_ctx->encode_data->encode(&decoding_encoding_ctx->encode, &codepoints_ref, codepoints_end);
- if (!dom_process_parse_chunk(ctx, document, parser, lxb_encoding_encode_buf_used(&decoding_encoding_ctx->encode), decoding_encoding_ctx->encoding_output, decoding_buffer_size, tokenizer_error_offset, tree_error_offset)) {
+ if (!dom_process_parse_chunk(
+ ctx,
+ document,
+ parser,
+ lxb_encoding_encode_buf_used(&decoding_encoding_ctx->encode),
+ decoding_encoding_ctx->encoding_output,
+ decoding_buffer_size,
+ tokenizer_error_offset,
+ tree_error_offset
+ )) {
return false;
}
}
}
(void) lxb_encoding_encode_finish(&decoding_encoding_ctx->encode);
- if (lxb_encoding_encode_buf_used(&decoding_encoding_ctx->encode) && !dom_process_parse_chunk(ctx, document, parser, lxb_encoding_encode_buf_used(&decoding_encoding_ctx->encode), decoding_encoding_ctx->encoding_output, lxb_encoding_decode_buf_used(&decoding_encoding_ctx->decode), tokenizer_error_offset, tree_error_offset)) {
+ if (lxb_encoding_encode_buf_used(&decoding_encoding_ctx->encode)
+ && !dom_process_parse_chunk(
+ ctx,
+ document,
+ parser,
+ lxb_encoding_encode_buf_used(&decoding_encoding_ctx->encode),
+ decoding_encoding_ctx->encoding_output,
+ lxb_encoding_decode_buf_used(&decoding_encoding_ctx->decode),
+ tokenizer_error_offset,
+ tree_error_offset
+ )) {
return false;
}
return true;
From f798f451d0cd63b3f29c9f791793f869ab6178cf Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Tue, 31 Oct 2023 20:11:40 +0100
Subject: [PATCH 41/53] Give magic values descriptive names
---
ext/dom/html5_parser.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/ext/dom/html5_parser.c b/ext/dom/html5_parser.c
index 095adb8d2dbcd..a30fbf0c46645 100644
--- a/ext/dom/html5_parser.c
+++ b/ext/dom/html5_parser.c
@@ -29,6 +29,10 @@
#include
#include
+#define WORK_LIST_INIT_SIZE 128
+/* libxml2 reserves 2 pointer-sized words for interned strings */
+#define LXML_INTERNED_STRINGS_SIZE (sizeof(void *) * 2)
+
typedef struct {
lxb_dom_node_t *node;
uintptr_t current_active_namespace;
@@ -69,7 +73,7 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(lxb_dom_node_t
lexbor_libxml2_bridge_status retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
lexbor_array_obj_t work_list;
- lexbor_array_obj_init(&work_list, 128, sizeof(work_list_item));
+ lexbor_array_obj_init(&work_list, WORK_LIST_INIT_SIZE, sizeof(work_list_item));
for (lxb_dom_node_t *node = start_node; node != NULL; node = node->prev) {
lexbor_libxml2_bridge_work_list_item_push(&work_list, node, LXB_NS__UNDEF, (xmlNodePtr) lxml_doc, NULL);
@@ -120,7 +124,7 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(lxb_dom_node_t
goto out;
}
xmlNodePtr lxml_text;
- if (compact_text_nodes && data_length < sizeof(void *) * 2) {
+ if (compact_text_nodes && data_length < LXML_INTERNED_STRINGS_SIZE) {
/* See xmlSAX2TextNode() in libxml2 */
lxml_text = xmlMalloc(sizeof(xmlNode));
if (UNEXPECTED(lxml_text == NULL)) {
From cf07ba98a602ee760a17d33f54c15d0d574f9f50 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Tue, 31 Oct 2023 20:11:50 +0100
Subject: [PATCH 42/53] Codestyle nit
---
ext/dom/html5_serializer.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ext/dom/html5_serializer.c b/ext/dom/html5_serializer.c
index af6f5551effca..ed5f94089d90f 100644
--- a/ext/dom/html5_serializer.c
+++ b/ext/dom/html5_serializer.c
@@ -167,7 +167,7 @@ static zend_result dom_html5_serialize_element_start(dom_html5_serialize_context
/* We don't support the "is" value during element creation, so no handling here. */
/* Some namespace declarations are also attributes (see https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token) */
- for (const xmlNs *ns = node->nsDef; ns; ns = ns->next) {
+ for (const xmlNs *ns = node->nsDef; ns != NULL; ns = ns->next) {
if (!dom_ns_is_also_an_attribute(ns)) {
continue;
}
From ea7d604ce99c21055c01b6d5b742adf24d3fcb40 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Tue, 31 Oct 2023 20:12:46 +0100
Subject: [PATCH 43/53] Reduce repetition
---
ext/dom/html_document.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index d23b89c20e2f3..5e9dd6a6be43e 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -674,8 +674,9 @@ PHP_METHOD(DOM_HTMLDocument, createFromString)
while (source_len > 0) {
size_t chunk_size = source_len;
- if (chunk_size > sizeof(decoding_encoding_ctx.encoding_output) / sizeof(lxb_char_t)) {
- chunk_size = sizeof(decoding_encoding_ctx.encoding_output) / sizeof(lxb_char_t);
+ const size_t MAX_CHUNK_SIZE = sizeof(decoding_encoding_ctx.encoding_output) / sizeof(*decoding_encoding_ctx.encoding_output);
+ if (chunk_size > MAX_CHUNK_SIZE) {
+ chunk_size = MAX_CHUNK_SIZE;
}
source_len -= chunk_size;
From d906c0dc9f81c7c8e5bf28c08fd046125f4954e7 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Tue, 31 Oct 2023 20:14:08 +0100
Subject: [PATCH 44/53] Apply strict properties
---
ext/dom/php_dom.stub.php | 2 ++
ext/dom/php_dom_arginfo.h | 22 +++++++++++-----------
2 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index 34d9e294278c9..5547a56df9656 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -1133,6 +1133,7 @@ public function prepend(...$nodes): void {}
public function replaceChildren(...$nodes): void {}
}
+ /** @strict-properties */
final class HTMLDocument extends DOM\Document
{
private function __construct() {}
@@ -1154,6 +1155,7 @@ public function saveHTML(?Node $node = null): string|false {}
public function saveHTMLFile(string $filename): int|false {}
}
+ /** @strict-properties */
final class XMLDocument extends DOM\Document
{
/** @implementation-alias DOM\HTMLDocument::__construct */
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index b3e85b4c5d7aa..f3144871d928b 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 00c85801438f6e1054b82fd376d287488ec52a2f */
+ * Stub hash: c550911c409dac315afdbfc5858b0d8b0cba2bf3 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -493,17 +493,17 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getE
ZEND_ARG_TYPE_INFO(0, elementId, IS_STRING, 0)
ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getElementsByTagName, 0, 1, DOM\\NodeList, 0)
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getElementsByTagName, 0, 1, DOM\\\116odeList, 0)
ZEND_ARG_TYPE_INFO(0, qualifiedName, IS_STRING, 0)
ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getElementsByTagNameNS, 0, 2, DOM\\NodeList, 0)
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_DOM_Document_getElementsByTagNameNS, 0, 2, DOM\\\116odeList, 0)
ZEND_ARG_TYPE_INFO(0, namespace, IS_STRING, 1)
ZEND_ARG_TYPE_INFO(0, localName, IS_STRING, 0)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOM_Document_importNode, 0, 0, 1)
- ZEND_ARG_OBJ_INFO(0, node, DOM\\Node, 0)
+ ZEND_ARG_OBJ_INFO(0, node, DOM\\\116ode, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, deep, _IS_BOOL, 0, "false")
ZEND_END_ARG_INFO()
@@ -540,8 +540,8 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOM_Document_rel
ZEND_END_ARG_INFO()
#endif
-ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_TYPE_MASK_EX(arginfo_class_DOM_Document_adoptNode, 0, 1, DOM\\Node, MAY_BE_FALSE)
- ZEND_ARG_OBJ_INFO(0, node, DOM\\Node, 0)
+ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_TYPE_MASK_EX(arginfo_class_DOM_Document_adoptNode, 0, 1, DOM\\\116ode, MAY_BE_FALSE)
+ ZEND_ARG_OBJ_INFO(0, node, DOM\\\116ode, 0)
ZEND_END_ARG_INFO()
#define arginfo_class_DOM_Document_append arginfo_class_DOMParentNode_append
@@ -569,7 +569,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_HTMLDocument_createFrom
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveXML, 0, 0, MAY_BE_STRING|MAY_BE_FALSE)
- ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOM\\Node, 1, "null")
+ ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOM\\\116ode, 1, "null")
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
ZEND_END_ARG_INFO()
@@ -579,7 +579,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveXMLFi
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveHTML, 0, 0, MAY_BE_STRING|MAY_BE_FALSE)
- ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOM\\Node, 1, "null")
+ ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOM\\\116ode, 1, "null")
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_DOM_HTMLDocument_saveHTMLFile, 0, 1, MAY_BE_LONG|MAY_BE_FALSE)
@@ -612,7 +612,7 @@ ZEND_END_ARG_INFO()
#define arginfo_class_DOM_XMLDocument_xinclude arginfo_class_DOMDocument_xinclude
ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_MASK_EX(arginfo_class_DOM_XMLDocument_saveXML, 0, 0, MAY_BE_STRING|MAY_BE_FALSE)
- ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOM\\Node, 1, "null")
+ ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, node, DOM\\\116ode, 1, "null")
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
ZEND_END_ARG_INFO()
@@ -2040,7 +2040,7 @@ static zend_class_entry *register_class_DOM_HTMLDocument(zend_class_entry *class
INIT_NS_CLASS_ENTRY(ce, "DOM", "HTMLDocument", class_DOM_HTMLDocument_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOM_DOM_Document);
- class_entry->ce_flags |= ZEND_ACC_FINAL;
+ class_entry->ce_flags |= ZEND_ACC_FINAL|ZEND_ACC_NO_DYNAMIC_PROPERTIES;
return class_entry;
}
@@ -2051,7 +2051,7 @@ static zend_class_entry *register_class_DOM_XMLDocument(zend_class_entry *class_
INIT_NS_CLASS_ENTRY(ce, "DOM", "XMLDocument", class_DOM_XMLDocument_methods);
class_entry = zend_register_internal_class_ex(&ce, class_entry_DOM_DOM_Document);
- class_entry->ce_flags |= ZEND_ACC_FINAL;
+ class_entry->ce_flags |= ZEND_ACC_FINAL|ZEND_ACC_NO_DYNAMIC_PROPERTIES;
zval property_xmlEncoding_default_value;
ZVAL_UNDEF(&property_xmlEncoding_default_value);
From 92e906483d7cf452f5e693ac43492cb0e11648ab Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Wed, 1 Nov 2023 20:05:18 +0100
Subject: [PATCH 45/53] Code style nits
---
ext/dom/html5_parser.c | 4 ++--
ext/dom/html_document.c | 8 ++++----
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/ext/dom/html5_parser.c b/ext/dom/html5_parser.c
index a30fbf0c46645..918d974c99489 100644
--- a/ext/dom/html5_parser.c
+++ b/ext/dom/html5_parser.c
@@ -126,12 +126,12 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(lxb_dom_node_t
xmlNodePtr lxml_text;
if (compact_text_nodes && data_length < LXML_INTERNED_STRINGS_SIZE) {
/* See xmlSAX2TextNode() in libxml2 */
- lxml_text = xmlMalloc(sizeof(xmlNode));
+ lxml_text = xmlMalloc(sizeof(*lxml_text));
if (UNEXPECTED(lxml_text == NULL)) {
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
goto out;
}
- memset(lxml_text, 0, sizeof(xmlNode));
+ memset(lxml_text, 0, sizeof(*lxml_text));
lxml_text->name = xmlStringText;
lxml_text->type = XML_TEXT_NODE;
lxml_text->doc = lxml_doc;
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 5e9dd6a6be43e..6f05b342cbbfe 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -83,7 +83,7 @@ static void dom_decoding_encoding_ctx_init(dom_decoding_encoding_ctx *ctx)
ctx->decode_data = NULL;
/* Set fast path on by default so that the decoder finishing is skipped if this was never initialised properly. */
ctx->fast_path = true;
- (void) lxb_encoding_encode_init(&ctx->encode, ctx->encode_data, ctx->encoding_output, sizeof(ctx->encoding_output) / sizeof(lxb_char_t));
+ (void) lxb_encoding_encode_init(&ctx->encode, ctx->encode_data, ctx->encoding_output, sizeof(ctx->encoding_output) / sizeof(*ctx->encoding_output));
(void) lxb_encoding_encode_replace_set(&ctx->encode, LXB_ENCODING_REPLACEMENT_BYTES, LXB_ENCODING_REPLACEMENT_SIZE);
}
@@ -389,7 +389,7 @@ static void dom_setup_parser_encoding_manually(const lxb_char_t *buf_start, cons
decoding_encoding_ctx->decode_data = encoding_data;
- (void) lxb_encoding_decode_init(&decoding_encoding_ctx->decode, decoding_encoding_ctx->decode_data, decoding_encoding_ctx->codepoints, sizeof(decoding_encoding_ctx->codepoints) / sizeof(lxb_codepoint_t));
+ (void) lxb_encoding_decode_init(&decoding_encoding_ctx->decode, decoding_encoding_ctx->decode_data, decoding_encoding_ctx->codepoints, sizeof(decoding_encoding_ctx->codepoints) / sizeof(*decoding_encoding_ctx->codepoints));
(void) lxb_encoding_decode_replace_set(&decoding_encoding_ctx->decode, &replacement_codepoint, LXB_ENCODING_REPLACEMENT_BUFFER_LEN);
decoding_encoding_ctx->fast_path = decoding_encoding_ctx->decode_data == decoding_encoding_ctx->encode_data; /* Note: encode_data is for UTF-8 */
@@ -947,8 +947,8 @@ static zend_result dom_common_save(dom_output_ctx *output_ctx, const xmlDoc *doc
lxb_encoding_decode_t decode;
lxb_char_t encoding_output[4096];
lxb_codepoint_t codepoints[4096];
- (void) lxb_encoding_encode_init(&encode, encoding_data, encoding_output, sizeof(encoding_output) / sizeof(lxb_char_t));
- (void) lxb_encoding_decode_init(&decode, decoding_data, codepoints, sizeof(codepoints) / sizeof(lxb_codepoint_t));
+ (void) lxb_encoding_encode_init(&encode, encoding_data, encoding_output, sizeof(encoding_output) / sizeof(*encoding_output));
+ (void) lxb_encoding_decode_init(&decode, decoding_data, codepoints, sizeof(codepoints) / sizeof(*codepoints));
if (encoding_data->encoding == LXB_ENCODING_UTF_8) {
lxb_encoding_encode_replace_set(&encode, LXB_ENCODING_REPLACEMENT_BYTES, LXB_ENCODING_REPLACEMENT_SIZE);
} else {
From 92faa43332dd29573f319429af6d83a4b32abf8a Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 11 Nov 2023 17:16:08 +0100
Subject: [PATCH 46/53] Review remarks
---
...ument_createFromFile_override_encoding.phpt | 2 ++
...ent_createFromString_override_encoding.phpt | 2 ++
.../HTMLDocument_serialize_doctype.phpt | 2 +-
...ument_createFromFile_override_encoding.phpt | 2 ++
...ent_createFromString_override_encoding.phpt | 2 ++
.../modern/xml/XMLDocument_fromString_03.phpt | 18 ++++++++++++++++--
6 files changed, 25 insertions(+), 3 deletions(-)
diff --git a/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_override_encoding.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_override_encoding.phpt
index c0d1bd58fdfd1..4f6f9943d2b62 100644
--- a/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_override_encoding.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromFile_override_encoding.phpt
@@ -11,10 +11,12 @@ try {
echo $e->getMessage(), "\n";
}
+// The override encoding matches with the document encoding attribute
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . '/gb18030_without_charset.html', overrideEncoding: 'GB18030');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
+// The override encoding mismatches with the document encoding attribute
$dom = DOM\HTMLDocument::createFromFile(__DIR__ . '/fallback_encoding.html', overrideEncoding: 'Windows-1252');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
diff --git a/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromString_override_encoding.phpt b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromString_override_encoding.phpt
index 8aebc0fb6d696..b276be453eec0 100644
--- a/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromString_override_encoding.phpt
+++ b/ext/dom/tests/modern/html/encoding/HTMLDocument_createFromString_override_encoding.phpt
@@ -11,10 +11,12 @@ try {
echo $e->getMessage(), "\n";
}
+// The override encoding matches with the document encoding attribute
$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . '/gb18030_without_charset.html'), overrideEncoding: 'GB18030');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
+// The override encoding mismatches with the document encoding attribute
$dom = DOM\HTMLDocument::createFromString(file_get_contents(__DIR__ . '/fallback_encoding.html'), overrideEncoding: 'Windows-1252');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_doctype.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_doctype.phpt
index 97e2547a3d60f..72823bb5b4bba 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_doctype.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_doctype.phpt
@@ -18,7 +18,7 @@ HTML, LIBXML_NOERROR);
echo "--- XML encoding ---\n";
echo $dom->saveXML();
echo "--- HTML encoding ---\n";
-// We don't expec to see the public ID and the system ID because the serialization algorithm doesn't serialize those
+// We don't expect to see the public ID and the system ID because the serialization algorithm doesn't serialize those
echo $dom->saveHTML();
?>
diff --git a/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt b/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt
index e4519f9d184bf..6090d9b34b024 100644
--- a/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt
+++ b/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt
@@ -11,10 +11,12 @@ try {
echo $e->getMessage(), "\n";
}
+// The override encoding matches with the document encoding attribute
$dom = DOM\XMLDocument::createFromFile(__DIR__ . '/dummy.xml', overrideEncoding: 'UTF-8');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
+// The override encoding mismatches with the document encoding attribute
$dom = DOM\XMLDocument::createFromFile(__DIR__ . '/dummy.xml', overrideEncoding: 'Windows-1252');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
diff --git a/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt b/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt
index cee18f52354ad..bcbca9a4fdf48 100644
--- a/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt
+++ b/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt
@@ -11,10 +11,12 @@ try {
echo $e->getMessage(), "\n";
}
+// The override encoding matches with the document encoding attribute
$dom = DOM\XMLDocument::createFromString(file_get_contents(__DIR__ . '/dummy.xml'), overrideEncoding: 'UTF-8');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
+// The override encoding mismatches with the document encoding attribute
$dom = DOM\XMLDocument::createFromString(file_get_contents(__DIR__ . '/dummy.xml'), overrideEncoding: 'Windows-1252');
var_dump($dom->documentElement->lastChild->textContent);
var_dump($dom->encoding);
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt
index e7305d1aacb78..359f7086efcea 100644
--- a/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt
@@ -16,9 +16,23 @@ try {
}
foreach ($flags as $flag) {
- DOM\XMLDocument::createFromString(' ', $flag);
+ var_dump(DOM\XMLDocument::createFromString(' ', $flag) instanceof DOM\XMLDocument);
}
?>
--EXPECT--
-DOM\XMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOENT, LIBXML_DTDLOAD, LIBXML_DTDATTR, LIBXML_DTDVALID, LIBXML_NOERROR, LIBXML_NOWARNING, LIBXML_NOBLANKS, LIBXML_XINCLUDE, LIBXML_NSCLEAN, LIBXML_NOCDATA, LIBXML_NONET, LIBXML_PEDANTIC, LIBXML_COMPACT, LIBXML_PARSEHUGE, LIBXML_BIGLINES)
+DOM\XMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOENT, LIBXML_DTDLOAD, LIBXML_DTDATTR, LIBXML_DTDVALID, LIBXML_NOERROR, LIBXML_NOWARNING, LIBXML_NOBLANKS, LIBXML_XINCLUDE, LIBXML_NSCLEAN, LIBXML_NOCDATA, LIBXML_NONET, LIBXML_PEDANTIC, LIBXML_COMPACT, LIBXML_PARSEHUGE, LIBXML_BIGLINES)bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
From c645f5d999f5cfd83098a063f5b8bc2269479304 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 11 Nov 2023 17:44:46 +0100
Subject: [PATCH 47/53] Code style linebreaks
---
ext/dom/html5_parser.c | 84 ++++++++--
ext/dom/html5_parser.h | 45 +++++-
ext/dom/html5_serializer.c | 5 +-
ext/dom/html_document.c | 311 ++++++++++++++++++++++++++++++++-----
ext/dom/xml_document.c | 57 ++++++-
5 files changed, 433 insertions(+), 69 deletions(-)
diff --git a/ext/dom/html5_parser.c b/ext/dom/html5_parser.c
index 918d974c99489..dbe83eb340eeb 100644
--- a/ext/dom/html5_parser.c
+++ b/ext/dom/html5_parser.c
@@ -40,7 +40,13 @@ typedef struct {
xmlNsPtr lxml_ns;
} work_list_item;
-static void lexbor_libxml2_bridge_work_list_item_push(lexbor_array_obj_t *array, lxb_dom_node_t *node, uintptr_t current_active_namespace, xmlNodePtr lxml_parent, xmlNsPtr lxml_ns)
+static void lexbor_libxml2_bridge_work_list_item_push(
+ lexbor_array_obj_t *array,
+ lxb_dom_node_t *node,
+ uintptr_t current_active_namespace,
+ xmlNodePtr lxml_parent,
+ xmlNsPtr lxml_ns
+)
{
work_list_item *item = (work_list_item *) lexbor_array_obj_push_wo_cls(array);
item->node = node;
@@ -68,7 +74,12 @@ static const xmlChar *get_libxml_namespace_href(uintptr_t lexbor_namespace)
}
}
-static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(lxb_dom_node_t *start_node, xmlDocPtr lxml_doc, bool compact_text_nodes, bool create_default_ns)
+static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
+ lxb_dom_node_t *start_node,
+ xmlDocPtr lxml_doc,
+ bool compact_text_nodes,
+ bool create_default_ns
+)
{
lexbor_libxml2_bridge_status retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
@@ -106,14 +117,27 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(lxb_dom_node_t
if (create_default_ns && UNEXPECTED(entering_namespace != current_stack_item->current_active_namespace)) {
current_lxml_ns = xmlNewNs(lxml_element, get_libxml_namespace_href(entering_namespace), NULL);
}
- lxml_element->ns = current_lxml_ns; /* Instead of xmlSetNs() because we know the arguments are valid. Prevents overhead. */
+ /* Instead of xmlSetNs() because we know the arguments are valid. Prevents overhead. */
+ lxml_element->ns = current_lxml_ns;
for (lxb_dom_node_t *child_node = element->node.last_child; child_node != NULL; child_node = child_node->prev) {
- lexbor_libxml2_bridge_work_list_item_push(&work_list, child_node, entering_namespace, lxml_element, current_lxml_ns);
+ lexbor_libxml2_bridge_work_list_item_push(
+ &work_list,
+ child_node,
+ entering_namespace,
+ lxml_element,
+ current_lxml_ns
+ );
}
for (lxb_dom_attr_t *attr = element->last_attr; attr != NULL; attr = attr->prev) {
- lexbor_libxml2_bridge_work_list_item_push(&work_list, (lxb_dom_node_t *) attr, entering_namespace, lxml_element, current_lxml_ns);
+ lexbor_libxml2_bridge_work_list_item_push(
+ &work_list,
+ (lxb_dom_node_t *) attr,
+ entering_namespace,
+ lxml_element,
+ current_lxml_ns
+ );
}
} else if (node->type == LXB_DOM_NODE_TYPE_TEXT) {
lxb_dom_text_t *text = lxb_dom_interface_text(node);
@@ -157,7 +181,12 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(lxb_dom_node_t
size_t public_id_len, system_id_len;
const lxb_char_t *public_id = lxb_dom_document_type_public_id(doctype, &public_id_len);
const lxb_char_t *system_id = lxb_dom_document_type_system_id(doctype, &system_id_len);
- xmlDtdPtr lxml_dtd = xmlCreateIntSubset(lxml_doc, name, public_id_len ? public_id : NULL, system_id_len ? system_id : NULL);
+ xmlDtdPtr lxml_dtd = xmlCreateIntSubset(
+ lxml_doc,
+ name,
+ public_id_len ? public_id : NULL,
+ system_id_len ? system_id : NULL
+ );
if (UNEXPECTED(lxml_dtd == NULL)) {
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
goto out;
@@ -199,13 +228,22 @@ void lexbor_libxml2_bridge_parse_context_init(lexbor_libxml2_bridge_parse_contex
memset(ctx, 0, sizeof(*ctx));
}
-void lexbor_libxml2_bridge_parse_set_error_callbacks(lexbor_libxml2_bridge_parse_context *ctx, lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter, lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter)
+void lexbor_libxml2_bridge_parse_set_error_callbacks(
+ lexbor_libxml2_bridge_parse_context *ctx,
+ lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter,
+ lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter
+)
{
ctx->tokenizer_error_reporter = tokenizer_error_reporter;
ctx->tree_error_reporter = tree_error_reporter;
}
-lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(lxb_html_document_t *document, xmlDocPtr *doc_out, bool compact_text_nodes, bool create_default_ns)
+lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
+ lxb_html_document_t *document,
+ xmlDocPtr *doc_out,
+ bool compact_text_nodes,
+ bool create_default_ns
+)
{
#ifdef LIBXML_HTML_ENABLED
xmlDocPtr lxml_doc = htmlNewDocNoDtD(NULL, NULL);
@@ -218,7 +256,12 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(lxb_html_doc
if (!lxml_doc) {
return LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
}
- lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert(lxb_dom_interface_node(document)->last_child, lxml_doc, compact_text_nodes, create_default_ns);
+ lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert(
+ lxb_dom_interface_node(document)->last_child,
+ lxml_doc,
+ compact_text_nodes,
+ create_default_ns
+ );
if (status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK) {
xmlFreeDoc(lxml_doc);
return status;
@@ -227,7 +270,14 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(lxb_html_doc
return LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
}
-void lexbor_libxml2_bridge_report_errors(const lexbor_libxml2_bridge_parse_context *ctx, lxb_html_parser_t *parser, const lxb_char_t *input_html, size_t chunk_offset, size_t *error_index_offset_tokenizer, size_t *error_index_offset_tree)
+void lexbor_libxml2_bridge_report_errors(
+ const lexbor_libxml2_bridge_parse_context *ctx,
+ lxb_html_parser_t *parser,
+ const lxb_char_t *input_html,
+ size_t chunk_offset,
+ size_t *error_index_offset_tokenizer,
+ size_t *error_index_offset_tree
+)
{
void *error;
@@ -238,7 +288,11 @@ void lexbor_libxml2_bridge_report_errors(const lexbor_libxml2_bridge_parse_conte
/* See https://github.com/lexbor/lexbor/blob/master/source/lexbor/html/tokenizer/error.h */
lxb_html_tokenizer_error_t *token_error = error;
if (ctx->tokenizer_error_reporter) {
- ctx->tokenizer_error_reporter(ctx->application_data, token_error, token_error->pos - input_html + chunk_offset);
+ ctx->tokenizer_error_reporter(
+ ctx->application_data,
+ token_error,
+ token_error->pos - input_html + chunk_offset
+ );
}
index++;
}
@@ -251,7 +305,13 @@ void lexbor_libxml2_bridge_report_errors(const lexbor_libxml2_bridge_parse_conte
/* See https://github.com/lexbor/lexbor/blob/master/source/lexbor/html/tree/error.h */
lxb_html_tree_error_t *tree_error = error;
if (ctx->tree_error_reporter) {
- ctx->tree_error_reporter(ctx->application_data, tree_error, tree_error->line + 1, tree_error->column + 1, tree_error->length);
+ ctx->tree_error_reporter(
+ ctx->application_data,
+ tree_error,
+ tree_error->line + 1,
+ tree_error->column + 1,
+ tree_error->length
+ );
}
index++;
}
diff --git a/ext/dom/html5_parser.h b/ext/dom/html5_parser.h
index e0e5b7b55cbf6..a3e7d16c5c4a6 100644
--- a/ext/dom/html5_parser.h
+++ b/ext/dom/html5_parser.h
@@ -14,8 +14,8 @@
+----------------------------------------------------------------------+
*/
-#ifndef CONVERT_H
-#define CONVERT_H
+#ifndef HTML5_PARSER_H
+#define HTML5_PARSER_H
#include
#include
@@ -29,8 +29,18 @@ typedef enum {
LEXBOR_LIBXML2_BRIDGE_STATUS_OOM,
} lexbor_libxml2_bridge_status;
-typedef void (*lexbor_libxml2_bridge_tokenizer_error_reporter)(void *application_data, lxb_html_tokenizer_error_t *error, size_t offset);
-typedef void (*lexbor_libxml2_bridge_tree_error_reporter)(void *application_data, lxb_html_tree_error_t *error, size_t line, size_t column, size_t len);
+typedef void (*lexbor_libxml2_bridge_tokenizer_error_reporter)(
+ void *application_data,
+ lxb_html_tokenizer_error_t *error,
+ size_t offset
+);
+typedef void (*lexbor_libxml2_bridge_tree_error_reporter)(
+ void *application_data,
+ lxb_html_tree_error_t *error,
+ size_t line,
+ size_t column,
+ size_t len
+);
typedef struct {
bool has_explicit_html_tag;
@@ -49,9 +59,28 @@ typedef struct {
} lexbor_libxml2_bridge_parse_context;
void lexbor_libxml2_bridge_parse_context_init(lexbor_libxml2_bridge_parse_context *ctx);
-void lexbor_libxml2_bridge_parse_set_error_callbacks(lexbor_libxml2_bridge_parse_context *ctx, lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter, lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter);
-lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(lxb_html_document_t *document, xmlDocPtr *doc_out, bool compact_text_nodes, bool create_default_ns);
-void lexbor_libxml2_bridge_report_errors(const lexbor_libxml2_bridge_parse_context *ctx, lxb_html_parser_t *parser, const lxb_char_t *input_html, size_t chunk_offset, size_t *error_index_offset_tokenizer, size_t *error_index_offset_tree);
-void lexbor_libxml2_bridge_copy_observations(lxb_html_tree_t *tree, lexbor_libxml2_bridge_extracted_observations *observations);
+void lexbor_libxml2_bridge_parse_set_error_callbacks(
+ lexbor_libxml2_bridge_parse_context *ctx,
+ lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter,
+ lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter
+);
+lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
+ lxb_html_document_t *document,
+ xmlDocPtr *doc_out,
+ bool compact_text_nodes,
+ bool create_default_ns
+);
+void lexbor_libxml2_bridge_report_errors(
+ const lexbor_libxml2_bridge_parse_context *ctx,
+ lxb_html_parser_t *parser,
+ const lxb_char_t *input_html,
+ size_t chunk_offset,
+ size_t *error_index_offset_tokenizer,
+ size_t *error_index_offset_tree
+);
+void lexbor_libxml2_bridge_copy_observations(
+ lxb_html_tree_t *tree,
+ lexbor_libxml2_bridge_extracted_observations *observations
+);
#endif
diff --git a/ext/dom/html5_serializer.c b/ext/dom/html5_serializer.c
index ed5f94089d90f..f0d43f09afbae 100644
--- a/ext/dom/html5_serializer.c
+++ b/ext/dom/html5_serializer.c
@@ -336,7 +336,10 @@ static zend_result dom_html5_serialize_node(dom_html5_serialize_context *ctx, co
zend_result dom_html5_serialize(dom_html5_serialize_context *ctx, const xmlNode *node)
{
/* Step 1. Note that this algorithm serializes children. Only elements, documents, and fragments can have children. */
- if (node->type != XML_ELEMENT_NODE && node->type != XML_DOCUMENT_FRAG_NODE && node->type != XML_DOCUMENT_NODE && node->type != XML_HTML_DOCUMENT_NODE) {
+ if (node->type != XML_ELEMENT_NODE
+ && node->type != XML_DOCUMENT_FRAG_NODE
+ && node->type != XML_DOCUMENT_NODE
+ && node->type != XML_HTML_DOCUMENT_NODE) {
return SUCCESS;
}
if (node->type == XML_ELEMENT_NODE && dom_html5_serializes_as_void(node)) {
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 6f05b342cbbfe..cbca349fa3f36 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -67,7 +67,8 @@ typedef struct {
} dom_output_ctx;
typedef struct {
- /* We can skip some conversion if the input and output encoding are both UTF-8, we only have to validate and substitute replacement characters */
+ /* We can skip some conversion if the input and output encoding are both UTF-8,
+ * we only have to validate and substitute replacement characters */
bool fast_path; /* Put first, near the encode & decode structures, for cache locality */
lxb_encoding_encode_t encode;
lxb_encoding_decode_t decode;
@@ -83,7 +84,12 @@ static void dom_decoding_encoding_ctx_init(dom_decoding_encoding_ctx *ctx)
ctx->decode_data = NULL;
/* Set fast path on by default so that the decoder finishing is skipped if this was never initialised properly. */
ctx->fast_path = true;
- (void) lxb_encoding_encode_init(&ctx->encode, ctx->encode_data, ctx->encoding_output, sizeof(ctx->encoding_output) / sizeof(*ctx->encoding_output));
+ (void) lxb_encoding_encode_init(
+ &ctx->encode,
+ ctx->encode_data,
+ ctx->encoding_output,
+ sizeof(ctx->encoding_output) / sizeof(*ctx->encoding_output)
+ );
(void) lxb_encoding_encode_replace_set(&ctx->encode, LXB_ENCODING_REPLACEMENT_BYTES, LXB_ENCODING_REPLACEMENT_SIZE);
}
@@ -204,7 +210,11 @@ static void dom_reset_line_column_cache(dom_line_column_cache *cache)
cache->last_offset = 0;
}
-static void dom_find_line_and_column_using_cache(const dom_lexbor_libxml2_bridge_application_data *application_data, dom_line_column_cache *cache, size_t offset)
+static void dom_find_line_and_column_using_cache(
+ const dom_lexbor_libxml2_bridge_application_data *application_data,
+ dom_line_column_cache *cache,
+ size_t offset
+)
{
offset -= application_data->current_total_offset;
if (offset > application_data->current_input_length) {
@@ -243,14 +253,24 @@ static void dom_find_line_and_column_using_cache(const dom_lexbor_libxml2_bridge
}
}
-static void dom_lexbor_libxml2_bridge_tokenizer_error_reporter(void *application_data_voidptr, lxb_html_tokenizer_error_t *error, size_t offset)
+static void dom_lexbor_libxml2_bridge_tokenizer_error_reporter(
+ void *application_data_voidptr,
+ lxb_html_tokenizer_error_t *error,
+ size_t offset
+)
{
dom_lexbor_libxml2_bridge_application_data *application_data = application_data_voidptr;
dom_find_line_and_column_using_cache(application_data, &application_data->cache_tokenizer, offset);
php_libxml_pretend_ctx_error_ex(application_data->input_name, application_data->cache_tokenizer.last_line, application_data->cache_tokenizer.last_column, "tokenizer error %s in %s, line: %zu, column: %zu\n", dom_lexbor_tokenizer_error_code_to_string(error->id), application_data->input_name, application_data->cache_tokenizer.last_line, application_data->cache_tokenizer.last_column);
}
-static void dom_lexbor_libxml2_bridge_tree_error_reporter(void *application_data_voidptr, lxb_html_tree_error_t *error, size_t line, size_t column, size_t len)
+static void dom_lexbor_libxml2_bridge_tree_error_reporter(
+ void *application_data_voidptr,
+ lxb_html_tree_error_t *error,
+ size_t line,
+ size_t column,
+ size_t len
+)
{
dom_lexbor_libxml2_bridge_application_data *application_data = application_data_voidptr;
@@ -261,9 +281,28 @@ static void dom_lexbor_libxml2_bridge_tree_error_reporter(void *application_data
if (UNEXPECTED(len <= 1)) {
/* Possible with EOF, or single-character tokens, don't use a range in the error display in this case */
- php_libxml_pretend_ctx_error_ex(application_data->input_name, line, column, "tree error %s in %s, line: %zu, column: %zu\n", dom_lexbor_tree_error_code_to_string(error->id), application_data->input_name, line, column);
+ php_libxml_pretend_ctx_error_ex(
+ application_data->input_name,
+ line,
+ column,
+ "tree error %s in %s, line: %zu, column: %zu\n",
+ dom_lexbor_tree_error_code_to_string(error->id),
+ application_data->input_name,
+ line,
+ column
+ );
} else {
- php_libxml_pretend_ctx_error_ex(application_data->input_name, line, column, "tree error %s in %s, line: %zu, column: %zu-%zu\n", dom_lexbor_tree_error_code_to_string(error->id), application_data->input_name, line, column, column + len - 1);
+ php_libxml_pretend_ctx_error_ex(
+ application_data->input_name,
+ line,
+ column,
+ "tree error %s in %s, line: %zu, column: %zu-%zu\n",
+ dom_lexbor_tree_error_code_to_string(error->id),
+ application_data->input_name,
+ line,
+ column,
+ column + len - 1
+ );
}
}
@@ -296,7 +335,11 @@ static void dom_place_remove_element_and_hoist_children(xmlNodePtr parent, const
}
}
-static void dom_post_process_html5_loading(xmlDocPtr lxml_doc, zend_long options, const lexbor_libxml2_bridge_extracted_observations *observations)
+static void dom_post_process_html5_loading(
+ xmlDocPtr lxml_doc,
+ zend_long options,
+ const lexbor_libxml2_bridge_extracted_observations *observations
+)
{
if (options & HTML_PARSE_NOIMPLIED) {
xmlNodePtr html_node = dom_search_child((xmlNodePtr) lxml_doc, "html");
@@ -317,7 +360,8 @@ static void dom_post_process_html5_loading(xmlDocPtr lxml_doc, zend_long options
if (!(options & DOM_HTML_NO_DEFAULT_NS) && EXPECTED(lxml_doc->children != NULL)) {
xmlNodePtr node = lxml_doc->children;
while (node) {
- /* Fine to use the DOM wrap reconciliation here because it's the "modern" world of DOM, and no user manipulation happened yet. */
+ /* Fine to use the DOM wrap reconciliation here because it's the "modern" world of DOM,
+ * and no user manipulation happened yet. */
xmlDOMWrapCtxt dummy_ctxt = {0};
xmlDOMWrapReconcileNamespaces(&dummy_ctxt, node, /* options */ 0);
node = node->next;
@@ -389,9 +433,19 @@ static void dom_setup_parser_encoding_manually(const lxb_char_t *buf_start, cons
decoding_encoding_ctx->decode_data = encoding_data;
- (void) lxb_encoding_decode_init(&decoding_encoding_ctx->decode, decoding_encoding_ctx->decode_data, decoding_encoding_ctx->codepoints, sizeof(decoding_encoding_ctx->codepoints) / sizeof(*decoding_encoding_ctx->codepoints));
- (void) lxb_encoding_decode_replace_set(&decoding_encoding_ctx->decode, &replacement_codepoint, LXB_ENCODING_REPLACEMENT_BUFFER_LEN);
- decoding_encoding_ctx->fast_path = decoding_encoding_ctx->decode_data == decoding_encoding_ctx->encode_data; /* Note: encode_data is for UTF-8 */
+ (void) lxb_encoding_decode_init(
+ &decoding_encoding_ctx->decode,
+ decoding_encoding_ctx->decode_data,
+ decoding_encoding_ctx->codepoints,
+ sizeof(decoding_encoding_ctx->codepoints) / sizeof(*decoding_encoding_ctx->codepoints)
+ );
+ (void) lxb_encoding_decode_replace_set(
+ &decoding_encoding_ctx->decode,
+ &replacement_codepoint,
+ LXB_ENCODING_REPLACEMENT_BUFFER_LEN
+ );
+ /* Note: encode_data is for UTF-8 */
+ decoding_encoding_ctx->fast_path = decoding_encoding_ctx->decode_data == decoding_encoding_ctx->encode_data;
if (decoding_encoding_ctx->fast_path) {
application_data->current_input_codepoints = NULL;
@@ -402,7 +456,12 @@ static void dom_setup_parser_encoding_manually(const lxb_char_t *buf_start, cons
}
}
-static void dom_setup_parser_encoding_implicitly(const lxb_char_t **buf_ref, size_t *read, dom_decoding_encoding_ctx *decoding_encoding_ctx, dom_lexbor_libxml2_bridge_application_data *application_data)
+static void dom_setup_parser_encoding_implicitly(
+ const lxb_char_t **buf_ref,
+ size_t *read,
+ dom_decoding_encoding_ctx *decoding_encoding_ctx,
+ dom_lexbor_libxml2_bridge_application_data *application_data
+)
{
const char *buf_start = (const char *) *buf_ref;
dom_character_encoding_data dom_encoding_data = dom_determine_encoding(buf_start, *read);
@@ -411,7 +470,16 @@ static void dom_setup_parser_encoding_implicitly(const lxb_char_t **buf_ref, siz
dom_setup_parser_encoding_manually((const lxb_char_t *) buf_start, dom_encoding_data.encoding_data, decoding_encoding_ctx, application_data);
}
-static bool dom_process_parse_chunk(lexbor_libxml2_bridge_parse_context *ctx, lxb_html_document_t *document, lxb_html_parser_t *parser, size_t encoded_length, const lxb_char_t *encoding_output, size_t input_buffer_length, size_t *tokenizer_error_offset, size_t *tree_error_offset)
+static bool dom_process_parse_chunk(
+ lexbor_libxml2_bridge_parse_context *ctx,
+ lxb_html_document_t *document,
+ lxb_html_parser_t *parser,
+ size_t encoded_length,
+ const lxb_char_t *encoding_output,
+ size_t input_buffer_length,
+ size_t *tokenizer_error_offset,
+ size_t *tree_error_offset
+)
{
dom_lexbor_libxml2_bridge_application_data *application_data = ctx->application_data;
application_data->current_input_length = input_buffer_length;
@@ -426,7 +494,16 @@ static bool dom_process_parse_chunk(lexbor_libxml2_bridge_parse_context *ctx, lx
return true;
}
-static bool dom_decode_encode_fast_path(lexbor_libxml2_bridge_parse_context *ctx, lxb_html_document_t *document, lxb_html_parser_t *parser, const lxb_char_t **buf_ref_ref, const lxb_char_t *buf_end, dom_decoding_encoding_ctx *decoding_encoding_ctx, size_t *tokenizer_error_offset, size_t *tree_error_offset)
+static bool dom_decode_encode_fast_path(
+ lexbor_libxml2_bridge_parse_context *ctx,
+ lxb_html_document_t *document,
+ lxb_html_parser_t *parser,
+ const lxb_char_t **buf_ref_ref,
+ const lxb_char_t *buf_end,
+ dom_decoding_encoding_ctx *decoding_encoding_ctx,
+ size_t *tokenizer_error_offset,
+ size_t *tree_error_offset
+)
{
const lxb_char_t *buf_ref = *buf_ref_ref;
const lxb_char_t *last_output = buf_ref;
@@ -482,7 +559,16 @@ static bool dom_decode_encode_fast_path(lexbor_libxml2_bridge_parse_context *ctx
return false;
}
-static bool dom_decode_encode_slow_path(lexbor_libxml2_bridge_parse_context *ctx, lxb_html_document_t *document, lxb_html_parser_t *parser, const lxb_char_t **buf_ref_ref, const lxb_char_t *buf_end, dom_decoding_encoding_ctx *decoding_encoding_ctx, size_t *tokenizer_error_offset, size_t *tree_error_offset)
+static bool dom_decode_encode_slow_path(
+ lexbor_libxml2_bridge_parse_context *ctx,
+ lxb_html_document_t *document,
+ lxb_html_parser_t *parser,
+ const lxb_char_t **buf_ref_ref,
+ const lxb_char_t *buf_end,
+ dom_decoding_encoding_ctx *decoding_encoding_ctx,
+ size_t *tokenizer_error_offset,
+ size_t *tree_error_offset
+)
{
const lxb_char_t *buf_ref = *buf_ref_ref;
lexbor_status_t decode_status, encode_status;
@@ -518,16 +604,50 @@ static bool dom_decode_encode_slow_path(lexbor_libxml2_bridge_parse_context *ctx
return false;
}
-static bool dom_parse_decode_encode_step(lexbor_libxml2_bridge_parse_context *ctx, lxb_html_document_t *document, lxb_html_parser_t *parser, const lxb_char_t **buf_ref_ref, const lxb_char_t *buf_end, dom_decoding_encoding_ctx *decoding_encoding_ctx, size_t *tokenizer_error_offset, size_t *tree_error_offset)
+static bool dom_parse_decode_encode_step(
+ lexbor_libxml2_bridge_parse_context *ctx,
+ lxb_html_document_t *document,
+ lxb_html_parser_t *parser,
+ const lxb_char_t **buf_ref_ref,
+ const lxb_char_t *buf_end,
+ dom_decoding_encoding_ctx *decoding_encoding_ctx,
+ size_t *tokenizer_error_offset,
+ size_t *tree_error_offset
+)
{
if (decoding_encoding_ctx->fast_path) {
- return dom_decode_encode_fast_path(ctx, document, parser, buf_ref_ref, buf_end, decoding_encoding_ctx, tokenizer_error_offset, tree_error_offset);
+ return dom_decode_encode_fast_path(
+ ctx,
+ document,
+ parser,
+ buf_ref_ref,
+ buf_end,
+ decoding_encoding_ctx,
+ tokenizer_error_offset,
+ tree_error_offset
+ );
} else {
- return dom_decode_encode_slow_path(ctx, document, parser, buf_ref_ref, buf_end, decoding_encoding_ctx, tokenizer_error_offset, tree_error_offset);
+ return dom_decode_encode_slow_path(
+ ctx,
+ document,
+ parser,
+ buf_ref_ref,
+ buf_end,
+ decoding_encoding_ctx,
+ tokenizer_error_offset,
+ tree_error_offset
+ );
}
}
-static bool dom_parse_decode_encode_finish(lexbor_libxml2_bridge_parse_context *ctx, lxb_html_document_t *document, lxb_html_parser_t *parser, dom_decoding_encoding_ctx *decoding_encoding_ctx, size_t *tokenizer_error_offset, size_t *tree_error_offset)
+static bool dom_parse_decode_encode_finish(
+ lexbor_libxml2_bridge_parse_context *ctx,
+ lxb_html_document_t *document,
+ lxb_html_parser_t *parser,
+ dom_decoding_encoding_ctx *decoding_encoding_ctx,
+ size_t *tokenizer_error_offset,
+ size_t *tree_error_offset
+)
{
if (!decoding_encoding_ctx->fast_path) {
/* Fast path handles codepoints one by one, so this part is not applicable in that case */
@@ -572,7 +692,11 @@ static bool check_options_validity(uint32_t arg_num, zend_long options)
{
const zend_long VALID_OPTIONS = XML_PARSE_NOERROR | XML_PARSE_COMPACT | HTML_PARSE_NOIMPLIED | DOM_HTML_NO_DEFAULT_NS;
if ((options & ~VALID_OPTIONS) != 0) {
- zend_argument_value_error(arg_num, "contains invalid flags (allowed flags: LIBXML_NOERROR, LIBXML_COMPACT, LIBXML_HTML_NOIMPLIED, DOM\\NO_DEFAULT_NS)");
+ zend_argument_value_error(arg_num, "contains invalid flags (allowed flags: "
+ "LIBXML_NOERROR, "
+ "LIBXML_COMPACT, "
+ "LIBXML_HTML_NOIMPLIED, "
+ "DOM\\NO_DEFAULT_NS)");
return false;
}
return true;
@@ -608,7 +732,12 @@ PHP_METHOD(DOM_HTMLDocument, createEmpty)
lxml_doc->encoding = xmlStrdup((const xmlChar *) encoding);
- dom_object *intern = php_dom_instantiate_object_helper(return_value, dom_html_document_class_entry, (xmlNodePtr) lxml_doc, NULL);
+ dom_object *intern = php_dom_instantiate_object_helper(
+ return_value,
+ dom_html_document_class_entry,
+ (xmlNodePtr) lxml_doc,
+ NULL
+ );
intern->document->is_modern_api_class = true;
return;
@@ -622,7 +751,15 @@ PHP_METHOD(DOM_HTMLDocument, createFromString)
const char *source, *override_encoding = NULL;
size_t source_len, override_encoding_len;
zend_long options = 0;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|lp!", &source, &source_len, &options, &override_encoding, &override_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(
+ ZEND_NUM_ARGS(),
+ "s|lp!",
+ &source,
+ &source_len,
+ &options,
+ &override_encoding,
+ &override_encoding_len
+ ) == FAILURE) {
RETURN_THROWS();
}
@@ -638,7 +775,11 @@ PHP_METHOD(DOM_HTMLDocument, createFromString)
lexbor_libxml2_bridge_parse_context ctx;
lexbor_libxml2_bridge_parse_context_init(&ctx);
if (!(options & XML_PARSE_NOERROR)) {
- lexbor_libxml2_bridge_parse_set_error_callbacks(&ctx, dom_lexbor_libxml2_bridge_tokenizer_error_reporter, dom_lexbor_libxml2_bridge_tree_error_reporter);
+ lexbor_libxml2_bridge_parse_set_error_callbacks(
+ &ctx,
+ dom_lexbor_libxml2_bridge_tokenizer_error_reporter,
+ dom_lexbor_libxml2_bridge_tree_error_reporter
+ );
}
ctx.application_data = &application_data;
@@ -650,7 +791,10 @@ PHP_METHOD(DOM_HTMLDocument, createFromString)
dom_decoding_encoding_ctx decoding_encoding_ctx;
dom_decoding_encoding_ctx_init(&decoding_encoding_ctx);
if (override_encoding != NULL) {
- const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name((const lxb_char_t *) override_encoding, override_encoding_len);
+ const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name(
+ (const lxb_char_t *) override_encoding,
+ override_encoding_len
+ );
if (!encoding_data) {
zend_argument_value_error(3, "must be a valid document encoding");
RETURN_THROWS();
@@ -681,7 +825,16 @@ PHP_METHOD(DOM_HTMLDocument, createFromString)
source_len -= chunk_size;
const lxb_char_t *buf_end = buf_ref + chunk_size;
- bool result = dom_parse_decode_encode_step(&ctx, document, parser, &buf_ref, buf_end, &decoding_encoding_ctx, &tokenizer_error_offset, &tree_error_offset);
+ bool result = dom_parse_decode_encode_step(
+ &ctx,
+ document,
+ parser,
+ &buf_ref,
+ buf_end,
+ &decoding_encoding_ctx,
+ &tokenizer_error_offset,
+ &tree_error_offset
+ );
if (!result) {
goto fail_oom;
}
@@ -697,10 +850,20 @@ PHP_METHOD(DOM_HTMLDocument, createFromString)
}
xmlDocPtr lxml_doc;
- lexbor_libxml2_bridge_status bridge_status = lexbor_libxml2_bridge_convert_document(document, &lxml_doc, options & XML_PARSE_COMPACT, !(options & DOM_HTML_NO_DEFAULT_NS));
+ lexbor_libxml2_bridge_status bridge_status = lexbor_libxml2_bridge_convert_document(
+ document,
+ &lxml_doc,
+ options & XML_PARSE_COMPACT,
+ !(options & DOM_HTML_NO_DEFAULT_NS)
+ );
lexbor_libxml2_bridge_copy_observations(parser->tree, &ctx.observations);
if (UNEXPECTED(bridge_status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
- php_libxml_ctx_error(NULL, "%s in %s", dom_lexbor_libxml2_bridge_status_code_to_string(bridge_status), application_data.input_name);
+ php_libxml_ctx_error(
+ NULL,
+ "%s in %s",
+ dom_lexbor_libxml2_bridge_status_code_to_string(bridge_status),
+ application_data.input_name
+ );
lxb_html_document_destroy(document);
RETURN_FALSE;
}
@@ -714,7 +877,12 @@ PHP_METHOD(DOM_HTMLDocument, createFromString)
lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
}
- dom_object *intern = php_dom_instantiate_object_helper(return_value, dom_html_document_class_entry, (xmlNodePtr) lxml_doc, NULL);
+ dom_object *intern = php_dom_instantiate_object_helper(
+ return_value,
+ dom_html_document_class_entry,
+ (xmlNodePtr) lxml_doc,
+ NULL
+ );
intern->document->is_modern_api_class = true;
return;
@@ -730,7 +898,15 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
size_t filename_len, override_encoding_len;
zend_long options = 0;
php_stream *stream = NULL;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "p|lp!", &filename, &filename_len, &options, &override_encoding, &override_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(
+ ZEND_NUM_ARGS(),
+ "p|lp!",
+ &filename,
+ &filename_len,
+ &options,
+ &override_encoding,
+ &override_encoding_len
+ ) == FAILURE) {
RETURN_THROWS();
}
@@ -752,7 +928,11 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
lexbor_libxml2_bridge_parse_context ctx;
lexbor_libxml2_bridge_parse_context_init(&ctx);
if (!(options & XML_PARSE_NOERROR)) {
- lexbor_libxml2_bridge_parse_set_error_callbacks(&ctx, dom_lexbor_libxml2_bridge_tokenizer_error_reporter, dom_lexbor_libxml2_bridge_tree_error_reporter);
+ lexbor_libxml2_bridge_parse_set_error_callbacks(
+ &ctx,
+ dom_lexbor_libxml2_bridge_tokenizer_error_reporter,
+ dom_lexbor_libxml2_bridge_tree_error_reporter
+ );
}
ctx.application_data = &application_data;
@@ -763,7 +943,10 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
dom_decoding_encoding_ctx_init(&decoding_encoding_ctx);
bool should_determine_encoding_implicitly = true; /* First read => determine encoding implicitly */
if (override_encoding != NULL) {
- const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name((const lxb_char_t *) override_encoding, override_encoding_len);
+ const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name(
+ (const lxb_char_t *) override_encoding,
+ override_encoding_len
+ );
if (!encoding_data) {
zend_argument_value_error(3, "must be a valid document encoding");
RETURN_THROWS();
@@ -772,7 +955,7 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
dom_setup_parser_encoding_manually((const lxb_char_t *) buf, encoding_data, &decoding_encoding_ctx, &application_data);
}
- stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, /* opened_path */ NULL, /* context */ php_libxml_get_stream_context());
+ stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, /* opened_path */ NULL, php_libxml_get_stream_context());
if (!stream) {
if (!EG(exception)) {
zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", filename);
@@ -784,10 +967,18 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
if (should_determine_encoding_implicitly) {
zend_string *charset = php_libxml_sniff_charset_from_stream(stream);
if (charset != NULL) {
- const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name((const lxb_char_t *) ZSTR_VAL(charset), ZSTR_LEN(charset));
+ const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name(
+ (const lxb_char_t *) ZSTR_VAL(charset),
+ ZSTR_LEN(charset)
+ );
if (encoding_data != NULL) {
should_determine_encoding_implicitly = false;
- dom_setup_parser_encoding_manually((const lxb_char_t *) buf, encoding_data, &decoding_encoding_ctx, &application_data);
+ dom_setup_parser_encoding_manually(
+ (const lxb_char_t *) buf,
+ encoding_data,
+ &decoding_encoding_ctx,
+ &application_data
+ );
}
zend_string_release_ex(charset, false);
}
@@ -817,7 +1008,16 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
}
const lxb_char_t *buf_end = buf_ref + read;
- bool result = dom_parse_decode_encode_step(&ctx, document, parser, &buf_ref, buf_end, &decoding_encoding_ctx, &tokenizer_error_offset, &tree_error_offset);
+ bool result = dom_parse_decode_encode_step(
+ &ctx,
+ document,
+ parser,
+ &buf_ref,
+ buf_end,
+ &decoding_encoding_ctx,
+ &tokenizer_error_offset,
+ &tree_error_offset
+ );
if (!result) {
goto fail_oom;
}
@@ -833,7 +1033,12 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
}
xmlDocPtr lxml_doc;
- lexbor_libxml2_bridge_status bridge_status = lexbor_libxml2_bridge_convert_document(document, &lxml_doc, options & XML_PARSE_COMPACT, !(options & DOM_HTML_NO_DEFAULT_NS));
+ lexbor_libxml2_bridge_status bridge_status = lexbor_libxml2_bridge_convert_document(
+ document,
+ &lxml_doc,
+ options & XML_PARSE_COMPACT,
+ !(options & DOM_HTML_NO_DEFAULT_NS)
+ );
lexbor_libxml2_bridge_copy_observations(parser->tree, &ctx.observations);
if (UNEXPECTED(bridge_status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
php_libxml_ctx_error(NULL, "%s in %s", dom_lexbor_libxml2_bridge_status_code_to_string(bridge_status), filename);
@@ -880,7 +1085,12 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
php_stream_close(stream);
- dom_object *intern = php_dom_instantiate_object_helper(return_value, dom_html_document_class_entry, (xmlNodePtr) lxml_doc, NULL);
+ dom_object *intern = php_dom_instantiate_object_helper(
+ return_value,
+ dom_html_document_class_entry,
+ (xmlNodePtr) lxml_doc,
+ NULL
+ );
intern->document->is_modern_api_class = true;
return;
@@ -922,7 +1132,11 @@ static zend_result dom_saveHTML_write_string_len(void *application_data, const c
const lxb_codepoint_t *codepoints_end = codepoints_ref + lxb_encoding_decode_buf_used(output->decode);
do {
encode_status = output->encoding_data->encode(output->encode, &codepoints_ref, codepoints_end);
- if (UNEXPECTED(output->write_output(output->output_data, (const char *) output->encoding_output, lxb_encoding_encode_buf_used(output->encode)) != SUCCESS)) {
+ if (UNEXPECTED(output->write_output(
+ output->output_data,
+ (const char *) output->encoding_output,
+ lxb_encoding_encode_buf_used(output->encode)
+ ) != SUCCESS)) {
return FAILURE;
}
lxb_encoding_encode_buf_used_set(output->encode, 0);
@@ -942,7 +1156,10 @@ static zend_result dom_common_save(dom_output_ctx *output_ctx, const xmlDoc *doc
{
/* Initialize everything related to encoding & decoding */
const lxb_encoding_data_t *decoding_data = lxb_encoding_data(LXB_ENCODING_UTF_8);
- const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name((const lxb_char_t *) docp->encoding, strlen((const char *) docp->encoding));
+ const lxb_encoding_data_t *encoding_data = lxb_encoding_data_by_name(
+ (const lxb_char_t *) docp->encoding,
+ strlen((const char *) docp->encoding)
+ );
lxb_encoding_encode_t encode;
lxb_encoding_decode_t decode;
lxb_char_t encoding_output[4096];
@@ -976,13 +1193,21 @@ static zend_result dom_common_save(dom_output_ctx *output_ctx, const xmlDoc *doc
if (lxb_encoding_decode_buf_used(&decode)) {
const lxb_codepoint_t *codepoints_ref = (const lxb_codepoint_t *) codepoints;
(void) encoding_data->encode(&encode, &codepoints_ref, codepoints_ref + lxb_encoding_decode_buf_used(&decode));
- if (UNEXPECTED(output_ctx->write_output(output_ctx->output_data, (const char *) encoding_output, lxb_encoding_encode_buf_used(&encode)) != SUCCESS)) {
+ if (UNEXPECTED(output_ctx->write_output(
+ output_ctx->output_data,
+ (const char *) encoding_output,
+ lxb_encoding_encode_buf_used(&encode)) != SUCCESS
+ )) {
return FAILURE;
}
}
(void) lxb_encoding_encode_finish(&encode);
if (lxb_encoding_encode_buf_used(&encode)) {
- if (UNEXPECTED(output_ctx->write_output(output_ctx->output_data, (const char *) encoding_output, lxb_encoding_encode_buf_used(&encode)) != SUCCESS)) {
+ if (UNEXPECTED(output_ctx->write_output(
+ output_ctx->output_data,
+ (const char *) encoding_output,
+ lxb_encoding_encode_buf_used(&encode)) != SUCCESS
+ )) {
return FAILURE;
}
}
@@ -1008,7 +1233,7 @@ PHP_METHOD(DOM_HTMLDocument, saveHTMLFile)
RETURN_THROWS();
}
- php_stream *stream = php_stream_open_wrapper_ex(file, "wb", REPORT_ERRORS, /* opened_path */ NULL, /* context */ php_libxml_get_stream_context());
+ php_stream *stream = php_stream_open_wrapper_ex(file, "wb", REPORT_ERRORS, /* opened_path */ NULL, php_libxml_get_stream_context());
if (!stream) {
RETURN_FALSE;
}
diff --git a/ext/dom/xml_document.c b/ext/dom/xml_document.c
index 78ebb5d6c503f..bd973ece28309 100644
--- a/ext/dom/xml_document.c
+++ b/ext/dom/xml_document.c
@@ -25,9 +25,38 @@
static bool check_options_validity(uint32_t arg_num, zend_long options)
{
- const zend_long VALID_OPTIONS = XML_PARSE_NOENT | XML_PARSE_DTDLOAD | XML_PARSE_DTDATTR | XML_PARSE_DTDVALID | XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NOBLANKS | XML_PARSE_XINCLUDE | XML_PARSE_NSCLEAN | XML_PARSE_NOCDATA | XML_PARSE_NONET | XML_PARSE_PEDANTIC | XML_PARSE_COMPACT | XML_PARSE_HUGE | XML_PARSE_BIG_LINES;
+ const zend_long VALID_OPTIONS = XML_PARSE_NOENT
+ | XML_PARSE_DTDLOAD
+ | XML_PARSE_DTDATTR
+ | XML_PARSE_DTDVALID
+ | XML_PARSE_NOERROR
+ | XML_PARSE_NOWARNING
+ | XML_PARSE_NOBLANKS
+ | XML_PARSE_XINCLUDE
+ | XML_PARSE_NSCLEAN
+ | XML_PARSE_NOCDATA
+ | XML_PARSE_NONET
+ | XML_PARSE_PEDANTIC
+ | XML_PARSE_COMPACT
+ | XML_PARSE_HUGE
+ | XML_PARSE_BIG_LINES;
if ((options & ~VALID_OPTIONS) != 0) {
- zend_argument_value_error(2, "contains invalid flags (allowed flags: LIBXML_NOENT, LIBXML_DTDLOAD, LIBXML_DTDATTR, LIBXML_DTDVALID, LIBXML_NOERROR, LIBXML_NOWARNING, LIBXML_NOBLANKS, LIBXML_XINCLUDE, LIBXML_NSCLEAN, LIBXML_NOCDATA, LIBXML_NONET, LIBXML_PEDANTIC, LIBXML_COMPACT, LIBXML_PARSEHUGE, LIBXML_BIGLINES)");
+ zend_argument_value_error(2, "contains invalid flags (allowed flags: "
+ "LIBXML_NOENT, "
+ "LIBXML_DTDLOAD, "
+ "LIBXML_DTDATTR, "
+ "LIBXML_DTDVALID, "
+ "LIBXML_NOERROR, "
+ "LIBXML_NOWARNING, "
+ "LIBXML_NOBLANKS, "
+ "LIBXML_XINCLUDE, "
+ "LIBXML_NSCLEAN, "
+ "LIBXML_NOCDATA, "
+ "LIBXML_NONET, "
+ "LIBXML_PEDANTIC, "
+ "LIBXML_COMPACT, "
+ "LIBXML_PARSEHUGE, "
+ "LIBXML_BIGLINES)");
return false;
}
return true;
@@ -127,7 +156,12 @@ PHP_METHOD(DOM_XMLDocument, createEmpty)
lxml_doc->encoding = xmlStrdup((const xmlChar *) encoding);
- dom_object *intern = php_dom_instantiate_object_helper(return_value, dom_xml_document_class_entry, (xmlNodePtr) lxml_doc, NULL);
+ dom_object *intern = php_dom_instantiate_object_helper(
+ return_value,
+ dom_xml_document_class_entry,
+ (xmlNodePtr) lxml_doc,
+ NULL
+ );
intern->document->is_modern_api_class = true;
return;
@@ -141,7 +175,15 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
const char *source, *override_encoding = NULL;
size_t source_len, override_encoding_len;
zend_long options = 0;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|lp!", &source, &source_len, &options, &override_encoding, &override_encoding_len) == FAILURE) {
+ if (zend_parse_parameters(
+ ZEND_NUM_ARGS(),
+ "s|lp!",
+ &source,
+ &source_len,
+ &options,
+ &override_encoding,
+ &override_encoding_len
+ ) == FAILURE) {
RETURN_THROWS();
}
@@ -186,7 +228,12 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
}
RETURN_THROWS();
}
- dom_object *intern = php_dom_instantiate_object_helper(return_value, dom_xml_document_class_entry, (xmlNodePtr) lxml_doc, NULL);
+ dom_object *intern = php_dom_instantiate_object_helper(
+ return_value,
+ dom_xml_document_class_entry,
+ (xmlNodePtr) lxml_doc,
+ NULL
+ );
intern->document->is_modern_api_class = true;
dom_mark_namespaces_as_attributes_too(lxml_doc);
}
From 308313c4acfe6f6ce1eea9f5e8ebd314ddef9147 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 11 Nov 2023 18:31:54 +0100
Subject: [PATCH 48/53] Make encoding explicit in XMLDocument
---
.../serializer/HTMLDocument_serialize_ns_imported_01.phpt | 2 +-
.../serializer/HTMLDocument_serialize_ns_imported_02.phpt | 2 +-
.../serializer/HTMLDocument_serialize_ns_imported_03.phpt | 2 +-
.../serializer/HTMLDocument_serialize_ns_imported_04.phpt | 2 +-
.../serializer/HTMLDocument_serialize_ns_imported_05.phpt | 2 +-
.../serializer/HTMLDocument_serialize_ns_imported_06.phpt | 2 +-
.../xml/XMLDocument_createFromFile_override_encoding.phpt | 4 ++--
.../xml/XMLDocument_createFromString_override_encoding.phpt | 4 ++--
ext/dom/tests/modern/xml/XMLDocument_fromFile_04.phpt | 2 +-
ext/dom/tests/modern/xml/XMLDocument_fromString_02.phpt | 4 ++--
.../modern/xml/XMLDocument_node_ownerDocument_for_XML.phpt | 4 ++--
ext/dom/xml_document.c | 5 +++++
12 files changed, 20 insertions(+), 15 deletions(-)
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_01.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_01.phpt
index d76529945c2a7..1395fe72f1e97 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_01.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_01.phpt
@@ -22,7 +22,7 @@ echo $html->saveHTML(), "\n";
?>
--EXPECT--
-
+
--- After import into HTML ---
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_02.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_02.phpt
index 6c3d01424dee1..c7297805b0a30 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_02.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_02.phpt
@@ -21,7 +21,7 @@ echo $html->saveHTML(), "\n";
?>
--EXPECT--
-
+
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_03.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_03.phpt
index f6b88496765aa..f5a51c16b01e5 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_03.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_03.phpt
@@ -21,7 +21,7 @@ echo $html->saveHTML(), "\n";
?>
--EXPECT--
-
+
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_04.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_04.phpt
index 7ecc1133f509e..9793043568c2d 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_04.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_04.phpt
@@ -21,7 +21,7 @@ echo $html->saveHTML(), "\n";
?>
--EXPECT--
-
+
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_05.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_05.phpt
index 588819a9b6c2f..09810be1e13ed 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_05.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_05.phpt
@@ -21,7 +21,7 @@ echo $html->saveHTML(), "\n";
?>
--EXPECT--
-
+
diff --git a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_06.phpt b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_06.phpt
index 8de70bd82fc72..5df88add914ee 100644
--- a/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_06.phpt
+++ b/ext/dom/tests/modern/html/serializer/HTMLDocument_serialize_ns_imported_06.phpt
@@ -21,7 +21,7 @@ echo $html->saveHTML(), "\n";
?>
--EXPECT--
-
+
diff --git a/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt b/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt
index 6090d9b34b024..9ca93b56d5879 100644
--- a/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt
+++ b/ext/dom/tests/modern/xml/XMLDocument_createFromFile_override_encoding.phpt
@@ -25,6 +25,6 @@ var_dump($dom->encoding);
--EXPECT--
DOM\XMLDocument::createFromFile(): Argument #3 ($overrideEncoding) must be a valid document encoding
string(2) "é"
-NULL
+string(5) "UTF-8"
string(4) "é"
-NULL
+string(12) "Windows-1252"
diff --git a/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt b/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt
index bcbca9a4fdf48..29ce548292e9a 100644
--- a/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt
+++ b/ext/dom/tests/modern/xml/XMLDocument_createFromString_override_encoding.phpt
@@ -25,6 +25,6 @@ var_dump($dom->encoding);
--EXPECT--
DOM\XMLDocument::createFromString(): Argument #3 ($overrideEncoding) must be a valid document encoding
string(2) "é"
-NULL
+string(5) "UTF-8"
string(4) "é"
-NULL
+string(12) "Windows-1252"
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromFile_04.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromFile_04.phpt
index 5b6798286fdb3..fcf8709e954cb 100644
--- a/ext/dom/tests/modern/xml/XMLDocument_fromFile_04.phpt
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromFile_04.phpt
@@ -10,7 +10,7 @@ echo $dom->saveXML();
?>
--EXPECT--
-
+
The Grapes of Wrath
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromString_02.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromString_02.phpt
index a3a7ed4b549d2..3576af5f6a625 100644
--- a/ext/dom/tests/modern/xml/XMLDocument_fromString_02.phpt
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromString_02.phpt
@@ -10,6 +10,6 @@ var_dump($dom->saveXMLFile("php://stdout"));
?>
--EXPECT--
-
+
-int(35)
+int(52)
diff --git a/ext/dom/tests/modern/xml/XMLDocument_node_ownerDocument_for_XML.phpt b/ext/dom/tests/modern/xml/XMLDocument_node_ownerDocument_for_XML.phpt
index d9d5f9c052de9..eacdbedd67ef6 100644
--- a/ext/dom/tests/modern/xml/XMLDocument_node_ownerDocument_for_XML.phpt
+++ b/ext/dom/tests/modern/xml/XMLDocument_node_ownerDocument_for_XML.phpt
@@ -15,9 +15,9 @@ var_dump($element->ownerDocument);
--EXPECTF--
object(DOM\XMLDocument)#1 (37) {
["encoding"]=>
- NULL
+ string(5) "UTF-8"
["xmlEncoding"]=>
- NULL
+ string(5) "UTF-8"
["standalone"]=>
bool(false)
["xmlStandalone"]=>
diff --git a/ext/dom/xml_document.c b/ext/dom/xml_document.c
index bd973ece28309..4f71ba499a241 100644
--- a/ext/dom/xml_document.c
+++ b/ext/dom/xml_document.c
@@ -228,6 +228,11 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
}
RETURN_THROWS();
}
+ if (override_encoding) {
+ lxml_doc->encoding = xmlStrdup((const xmlChar *) override_encoding);
+ } else {
+ lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
+ }
dom_object *intern = php_dom_instantiate_object_helper(
return_value,
dom_xml_document_class_entry,
From b7f065f9236c9f2abe1c2afb2c75a45edbb1338f Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 11 Nov 2023 18:34:14 +0100
Subject: [PATCH 49/53] Mark XMLDocument::validate() no longer as
tentative-return-type
---
ext/dom/php_dom.stub.php | 1 -
ext/dom/php_dom_arginfo.h | 5 +++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index 5547a56df9656..cc355612474c7 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -1197,7 +1197,6 @@ public static function createFromString(string $source, int $options = 0, ?strin
public function createEntityReference(string $name) {}
/**
- * @tentative-return-type
* @implementation-alias DOMDocument::validate
*/
public function validate(): bool {}
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index f3144871d928b..eb5e55c2cf280 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: c550911c409dac315afdbfc5858b0d8b0cba2bf3 */
+ * Stub hash: fbdcec5b706fe7dc0e757edfa262d96b44bd510f */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -607,7 +607,8 @@ ZEND_END_ARG_INFO()
#define arginfo_class_DOM_XMLDocument_createEntityReference arginfo_class_DOMDocument_createEntityReference
-#define arginfo_class_DOM_XMLDocument_validate arginfo_class_DOMNode_hasAttributes
+ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_DOM_XMLDocument_validate, 0, 0, _IS_BOOL, 0)
+ZEND_END_ARG_INFO()
#define arginfo_class_DOM_XMLDocument_xinclude arginfo_class_DOMDocument_xinclude
From dbc9afa477ddd40c27f4708760fa64ae924356eb Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sat, 11 Nov 2023 21:27:40 +0100
Subject: [PATCH 50/53] Only explicitly set encoding if not provided by the XML
parser
---
.../modern/xml/XMLDocument_fromString_04.phpt | 16 ++++++++++++++++
ext/dom/xml_document.c | 10 ++++++----
2 files changed, 22 insertions(+), 4 deletions(-)
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_fromString_04.phpt
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromString_04.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromString_04.phpt
new file mode 100644
index 0000000000000..f3a7cd6690275
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromString_04.phpt
@@ -0,0 +1,16 @@
+--TEST--
+DOM\XMLDocument::createFromString 04
+--EXTENSIONS--
+dom
+--FILE--
+ ');
+var_dump($dom->encoding);
+echo $dom->saveXML();
+
+?>
+--EXPECT--
+string(12) "Windows-1251"
+
+
diff --git a/ext/dom/xml_document.c b/ext/dom/xml_document.c
index 4f71ba499a241..0d12d66661b18 100644
--- a/ext/dom/xml_document.c
+++ b/ext/dom/xml_document.c
@@ -228,10 +228,12 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
}
RETURN_THROWS();
}
- if (override_encoding) {
- lxml_doc->encoding = xmlStrdup((const xmlChar *) override_encoding);
- } else {
- lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
+ if (lxml_doc->encoding == NULL) {
+ if (override_encoding) {
+ lxml_doc->encoding = xmlStrdup((const xmlChar *) override_encoding);
+ } else {
+ lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
+ }
}
dom_object *intern = php_dom_instantiate_object_helper(
return_value,
From c7b55c2194e1b3621a64fad1d6647be66e7d8e8e Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 12 Nov 2023 20:18:04 +0100
Subject: [PATCH 51/53] Code review fixes
---
ext/dom/document.c | 2 +-
ext/dom/php_dom.h | 8 +++++---
ext/dom/xml_document.c | 2 +-
3 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/ext/dom/document.c b/ext/dom/document.c
index ae38ded819bb8..c4ec263db7fd8 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -1197,7 +1197,7 @@ const char *_dom_get_valid_file_path(const char *source, char *resolved_path, in
}
/* }}} */
-xmlDocPtr dom_document_parser(zval *id, int mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding) /* {{{ */
+xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding) /* {{{ */
{
xmlDocPtr ret;
xmlParserCtxtPtr ctxt = NULL;
diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h
index 29b6927dc0cfb..dc28d21061103 100644
--- a/ext/dom/php_dom.h
+++ b/ext/dom/php_dom.h
@@ -160,10 +160,12 @@ void php_dom_document_constructor(INTERNAL_FUNCTION_PARAMETERS);
dom_object *php_dom_instantiate_object_helper(zval *return_value, zend_class_entry *ce, xmlNodePtr obj, dom_object *parent);
-#define DOM_LOAD_STRING 0
-#define DOM_LOAD_FILE 1
+typedef enum {
+ DOM_LOAD_STRING = 0,
+ DOM_LOAD_FILE = 1,
+} dom_load_mode;
-xmlDocPtr dom_document_parser(zval *id, int mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding);
+xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding);
/* parentnode */
void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc);
diff --git a/ext/dom/xml_document.c b/ext/dom/xml_document.c
index 0d12d66661b18..c45e2ccfda014 100644
--- a/ext/dom/xml_document.c
+++ b/ext/dom/xml_document.c
@@ -193,7 +193,7 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
}
if (ZEND_SIZE_T_INT_OVFL(source_len)) {
- zend_argument_value_error(1, "must not exceed INT_MAX in length");
+ zend_argument_value_error(1, "is too long");
RETURN_THROWS();
}
From 557aa43255d3c64480935c51333a1bdea5c8ce0c Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Mon, 13 Nov 2023 19:24:49 +0100
Subject: [PATCH 52/53] Defensively set stream to NULL
---
ext/dom/html_document.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index cbca349fa3f36..8c5c8ab48bc95 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -1084,6 +1084,7 @@ PHP_METHOD(DOM_HTMLDocument, createFromFile)
}
php_stream_close(stream);
+ stream = NULL;
dom_object *intern = php_dom_instantiate_object_helper(
return_value,
From 46db0d2cc314b6200089b25d4ddcea9133754e29 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Mon, 13 Nov 2023 19:26:47 +0100
Subject: [PATCH 53/53] NEWS
---
NEWS | 1 +
1 file changed, 1 insertion(+)
diff --git a/NEWS b/NEWS
index 29896fbe6b33d..ab95aab660b07 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,7 @@ DOM:
. Implement #53655 (Improve speed of DOMNode::C14N() on large XML documents).
(nielsdos)
. Fix cloning attribute with namespace disappearing namespace. (nielsdos)
+ . Implement DOM HTML5 parsing and serialization RFC. (nielsdos)
FTP:
. Removed the deprecated inet_ntoa call support. (David Carlier)