From a284b6a279ea66d42760058fe8a0aee32f947c08 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 12 Jan 2025 16:41:55 +0100
Subject: [PATCH 1/6] Factor out HTML document creation from stream to separate
function
---
ext/dom/html_document.c | 94 +++++++++++++++++++++++------------------
1 file changed, 52 insertions(+), 42 deletions(-)
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 41624bfe172fd..546a8f3fc538c 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -994,34 +994,17 @@ PHP_METHOD(Dom_HTMLDocument, createFromString)
RETURN_THROWS();
}
-PHP_METHOD(Dom_HTMLDocument, createFromFile)
+static void dom_html_document_create_from_stream(
+ zval *return_value,
+ php_stream *stream,
+ zend_long options,
+ const char *override_encoding,
+ size_t override_encoding_len,
+ zend_string *opened_path,
+ const char *filename
+)
{
- const char *filename, *override_encoding = NULL;
php_dom_private_data *private_data = NULL;
- size_t filename_len, override_encoding_len;
- zend_long options = 0;
- php_stream *stream = NULL;
- if (zend_parse_parameters(
- ZEND_NUM_ARGS(),
- "p|lp!",
- &filename,
- &filename_len,
- &options,
- &override_encoding,
- &override_encoding_len
- ) == FAILURE) {
- RETURN_THROWS();
- }
-
- /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */
- if (strstr(filename, "%00")) {
- zend_argument_value_error(1, "must not contain percent-encoded NUL bytes");
- RETURN_THROWS();
- }
-
- if (!check_options_validity(2, options)) {
- RETURN_THROWS();
- }
dom_lexbor_libxml2_bridge_application_data application_data;
application_data.input_name = filename;
@@ -1058,15 +1041,6 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile)
dom_setup_parser_encoding_manually((const lxb_char_t *) buf, encoding_data, &decoding_encoding_ctx, &application_data);
}
- zend_string *opened_path = NULL;
- stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, &opened_path, php_libxml_get_stream_context());
- if (!stream) {
- if (!EG(exception)) {
- zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", filename);
- }
- RETURN_THROWS();
- }
-
/* MIME sniff */
if (should_determine_encoding_implicitly) {
zend_string *charset = php_libxml_sniff_charset_from_stream(stream);
@@ -1192,12 +1166,6 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile)
lxml_doc->URL = xmlStrdup((const xmlChar *) filename);
}
- if (opened_path != NULL) {
- zend_string_release_ex(opened_path, false);
- }
- php_stream_close(stream);
- stream = NULL;
-
dom_object *intern = php_dom_instantiate_object_helper(
return_value,
dom_html_document_class_entry,
@@ -1216,10 +1184,52 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile)
php_dom_private_data_destroy(private_data);
}
lxb_html_document_destroy(document);
- php_stream_close(stream);
+}
+
+PHP_METHOD(Dom_HTMLDocument, createFromFile)
+{
+ const char *filename, *override_encoding = NULL;
+ size_t filename_len, override_encoding_len;
+ zend_long options = 0;
+ if (zend_parse_parameters(
+ ZEND_NUM_ARGS(),
+ "p|lp!",
+ &filename,
+ &filename_len,
+ &options,
+ &override_encoding,
+ &override_encoding_len
+ ) == FAILURE) {
+ RETURN_THROWS();
+ }
+
+ /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */
+ if (strstr(filename, "%00")) {
+ zend_argument_value_error(1, "must not contain percent-encoded NUL bytes");
+ RETURN_THROWS();
+ }
+
+ if (!check_options_validity(2, options)) {
+ RETURN_THROWS();
+ }
+
+ zend_string *opened_path = NULL;
+ php_stream *stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, &opened_path, php_libxml_get_stream_context());
+ if (!stream) {
+ if (!EG(exception)) {
+ zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", filename);
+ }
+ RETURN_THROWS();
+ }
+
+ dom_html_document_create_from_stream(
+ return_value, stream, options, override_encoding, override_encoding_len, opened_path, filename
+ );
+
if (opened_path != NULL) {
zend_string_release_ex(opened_path, false);
}
+ php_stream_close(stream);
}
static zend_result dom_write_output_smart_str(void *ctx, const char *buf, size_t size)
From d987b6d3ba2dea3193264a1c37bc151145f80df5 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 12 Jan 2025 17:31:20 +0100
Subject: [PATCH 2/6] Implement Dom\HTMLDocument::createFromStream()
---
ext/dom/html_document.c | 32 +++++++++++++-
ext/dom/php_dom.stub.php | 3 ++
ext/dom/php_dom_arginfo.h | 11 ++++-
.../parser/HTMLDocument_createFromStream.phpt | 27 ++++++++++++
.../HTMLDocument_createFromStream_broken.phpt | 44 +++++++++++++++++++
5 files changed, 115 insertions(+), 2 deletions(-)
create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream.phpt
create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream_broken.phpt
diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index 546a8f3fc538c..53fbacd67e87f 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -1004,10 +1004,12 @@ static void dom_html_document_create_from_stream(
const char *filename
)
{
+ ZEND_ASSERT(stream != NULL);
+
php_dom_private_data *private_data = NULL;
dom_lexbor_libxml2_bridge_application_data application_data;
- application_data.input_name = filename;
+ application_data.input_name = filename ? filename : "Entity";
application_data.current_total_offset = 0;
application_data.html_no_implied = options & HTML_PARSE_NOIMPLIED;
dom_reset_line_column_cache(&application_data.cache_tokenizer);
@@ -1232,6 +1234,34 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile)
php_stream_close(stream);
}
+PHP_METHOD(Dom_HTMLDocument, createFromStream)
+{
+ php_stream *stream;
+ zval *stream_zv;
+ const char *document_uri = NULL;
+ const char *override_encoding = NULL;
+ size_t override_encoding_len, document_uri_len;
+ zend_long options = 0;
+ if (zend_parse_parameters(
+ ZEND_NUM_ARGS(),
+ "r|p!lp!",
+ &stream_zv,
+ &document_uri,
+ &document_uri_len,
+ &options,
+ &override_encoding,
+ &override_encoding_len
+ ) == FAILURE) {
+ RETURN_THROWS();
+ }
+
+ php_stream_from_res(stream, Z_RES_P(stream_zv));
+
+ dom_html_document_create_from_stream(
+ return_value, stream, options, override_encoding, override_encoding_len, NULL, document_uri
+ );
+}
+
static zend_result dom_write_output_smart_str(void *ctx, const char *buf, size_t size)
{
smart_str_appendl((smart_str *) ctx, buf, size);
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index 43d26ec7a3c7d..00c693d3d4f81 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -2042,6 +2042,9 @@ public static function createEmpty(string $encoding = "UTF-8"): HTMLDocument {}
public static function createFromFile(string $path, int $options = 0, ?string $overrideEncoding = null): HTMLDocument {}
+ /** @param resource $stream */
+ public static function createFromStream($stream, ?string $documentURI = null, int $options = 0, ?string $overrideEncoding = null): HTMLDocument {}
+
public static function createFromString(string $source, int $options = 0, ?string $overrideEncoding = null): HTMLDocument {}
/** @implementation-alias Dom\XMLDocument::saveXml */
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index 5c21b909b0e18..a4cc3e762a603 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 0fcee2fa666dc88faf084578dde157409a6f5594 */
+ * Stub hash: 8018206ec17368080a8f58e03f3e4be53cde2e34 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_dom_import_simplexml, 0, 1, DOMAttr|DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -1005,6 +1005,13 @@ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_HTMLDocument_createFrom
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, overrideEncoding, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()
+ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_HTMLDocument_createFromStream, 0, 1, Dom\\HTMLDocument, 0)
+ ZEND_ARG_INFO(0, stream)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, documentURI, IS_STRING, 1, "null")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, overrideEncoding, IS_STRING, 1, "null")
+ZEND_END_ARG_INFO()
+
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_HTMLDocument_createFromString, 0, 1, Dom\\HTMLDocument, 0)
ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
@@ -1302,6 +1309,7 @@ ZEND_METHOD(Dom_Document, registerNodeClass);
ZEND_METHOD(Dom_Document, importLegacyNode);
ZEND_METHOD(Dom_HTMLDocument, createEmpty);
ZEND_METHOD(Dom_HTMLDocument, createFromFile);
+ZEND_METHOD(Dom_HTMLDocument, createFromStream);
ZEND_METHOD(Dom_HTMLDocument, createFromString);
ZEND_METHOD(Dom_XMLDocument, saveXml);
ZEND_METHOD(Dom_HTMLDocument, saveHtml);
@@ -1752,6 +1760,7 @@ static const zend_function_entry class_Dom_Document_methods[] = {
static const zend_function_entry class_Dom_HTMLDocument_methods[] = {
ZEND_ME(Dom_HTMLDocument, createEmpty, arginfo_class_Dom_HTMLDocument_createEmpty, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
ZEND_ME(Dom_HTMLDocument, createFromFile, arginfo_class_Dom_HTMLDocument_createFromFile, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
+ ZEND_ME(Dom_HTMLDocument, createFromStream, arginfo_class_Dom_HTMLDocument_createFromStream, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
ZEND_ME(Dom_HTMLDocument, createFromString, arginfo_class_Dom_HTMLDocument_createFromString, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
ZEND_RAW_FENTRY("saveXml", zim_Dom_XMLDocument_saveXml, arginfo_class_Dom_HTMLDocument_saveXml, ZEND_ACC_PUBLIC, NULL, NULL)
ZEND_RAW_FENTRY("saveXmlFile", zim_DOMDocument_save, arginfo_class_Dom_HTMLDocument_saveXmlFile, ZEND_ACC_PUBLIC, NULL, NULL)
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream.phpt
new file mode 100644
index 0000000000000..ef223cf4531a2
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream.phpt
@@ -0,0 +1,27 @@
+--TEST--
+Dom\HTMLDocument::createFromStream() - from memory
+--EXTENSIONS--
+dom
+--FILE--
+
Hello world
");
+rewind($tmp);
+$dom1 = Dom\HTMLDocument::createFromStream($tmp);
+rewind($tmp);
+$dom2 = Dom\HTMLDocument::createFromStream($tmp, "http://example.com");
+fclose($tmp);
+
+var_dump($dom1->documentURI);
+var_dump($dom2->documentURI);
+
+echo $dom1->saveHtml(), "\n";
+echo $dom2->saveHtml(), "\n";
+
+?>
+--EXPECT--
+string(11) "about:blank"
+string(18) "http://example.com"
+Hello world
+Hello world
diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream_broken.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream_broken.phpt
new file mode 100644
index 0000000000000..c02d9cf1a8069
--- /dev/null
+++ b/ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream_broken.phpt
@@ -0,0 +1,44 @@
+--TEST--
+Dom\HTMLDocument::createFromStream() - broken stream
+--EXTENSIONS--
+dom
+--FILE--
+first) {
+ $this->first = false;
+ return "Hello";
+ }
+ throw new Error("broken");
+ }
+
+ public function stream_open(string $path, string $mode, int $options, ?string &$opened_path) {
+ return true;
+ }
+
+ public function stream_close(): void {
+ }
+
+ public function stream_eof(): bool {
+ return !$this->first;
+ }
+}
+
+stream_wrapper_register("foo", MyStream::class);
+
+$tmp = fopen("foo://", "r+");
+try {
+ $dom = Dom\HTMLDocument::createFromStream($tmp);
+} catch (Error $e) {
+ echo $e->getMessage(), "\n";
+}
+fclose($tmp);
+
+?>
+--EXPECT--
+broken
From 584d38c1c87a2e6524fc44cc69503efdeb77a571 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 19 Jan 2025 14:58:39 +0100
Subject: [PATCH 3/6] Add XMLDocument::createFromStream()
---
ext/dom/document.c | 44 ++++---
ext/dom/php_dom.h | 15 ++-
ext/dom/php_dom.stub.php | 3 +
ext/dom/php_dom_arginfo.h | 11 +-
.../xml/XMLDocument_createFromStream.phpt | 29 +++++
.../XMLDocument_createFromStream_broken.phpt | 44 +++++++
ext/dom/xml_document.c | 119 ++++++++++++------
7 files changed, 206 insertions(+), 59 deletions(-)
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_createFromStream.phpt
create mode 100644 ext/dom/tests/modern/xml/XMLDocument_createFromStream_broken.phpt
diff --git a/ext/dom/document.c b/ext/dom/document.c
index 378f076c56068..aad6e202fdf9d 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -1345,7 +1345,17 @@ const char *dom_get_valid_file_path(const char *source, char *resolved_path, int
}
/* }}} */
-xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding) /* {{{ */
+static int dom_stream_read(void *context, char *buffer, int len)
+{
+ zend_resource *resource = context;
+ if (EXPECTED(resource->ptr)) {
+ php_stream *stream = resource->ptr;
+ return php_stream_read(stream, buffer, len);
+ }
+ return -1;
+}
+
+xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, dom_source_union source, size_t options, xmlCharEncodingHandlerPtr encoding, const char *override_document_uri) /* {{{ */
{
xmlDocPtr ret;
xmlParserCtxtPtr ctxt = NULL;
@@ -1371,16 +1381,18 @@ xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source,
xmlInitParser();
if (mode == DOM_LOAD_FILE) {
- if (CHECK_NULL_PATH(source, source_len)) {
+ if (CHECK_NULL_PATH(source.str, source.str_len)) {
zend_argument_value_error(1, "must not contain any null bytes");
return NULL;
}
- const char *file_dest = dom_get_valid_file_path(source, resolved_path, MAXPATHLEN);
+ const char *file_dest = dom_get_valid_file_path(source.str, resolved_path, MAXPATHLEN);
if (file_dest) {
ctxt = xmlCreateFileParserCtxt(file_dest);
}
+ } else if (mode == DOM_LOAD_STRING) {
+ ctxt = xmlCreateMemoryParserCtxt(source.str, source.str_len);
} else {
- ctxt = xmlCreateMemoryParserCtxt(source, source_len);
+ ctxt = xmlCreateIOParserCtxt(NULL, NULL, dom_stream_read, NULL, source.stream->res, XML_CHAR_ENCODING_NONE);
}
if (ctxt == NULL) {
@@ -1393,7 +1405,7 @@ xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source,
}
/* If loading from memory, we need to set the base directory for the document */
- if (mode != DOM_LOAD_FILE) {
+ if (mode == DOM_LOAD_STRING) {
#ifdef HAVE_GETCWD
directory = VCWD_GETCWD(resolved_path, MAXPATHLEN);
#elif defined(HAVE_GETWD)
@@ -1410,6 +1422,11 @@ xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source,
}
ctxt->directory = (char *) xmlCanonicPath((const xmlChar *) resolved_path);
}
+ } else if (override_document_uri) {
+ if(ctxt->directory != NULL) {
+ xmlFree(ctxt->directory);
+ }
+ ctxt->directory = (char *) xmlCanonicPath((const xmlChar *) override_document_uri);
}
ctxt->vctxt.error = php_libxml_ctx_error;
@@ -1507,21 +1524,20 @@ static void php_dom_finish_loading_document(zval *this, zval *return_value, xmlD
RETURN_TRUE;
}
-static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode)
+static void dom_legacy_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode)
{
- char *source;
- size_t source_len;
+ dom_source_union source;
zend_long options = 0;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &source, &source_len, &options) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &source.str, &source.str_len, &options) == FAILURE) {
RETURN_THROWS();
}
- if (!source_len) {
+ if (!source.str_len) {
zend_argument_must_not_be_empty_error(1);
RETURN_THROWS();
}
- if (ZEND_SIZE_T_INT_OVFL(source_len)) {
+ if (ZEND_SIZE_T_INT_OVFL(source.str_len)) {
php_error_docref(NULL, E_WARNING, "Input string is too long");
RETURN_FALSE;
}
@@ -1530,7 +1546,7 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode)
RETURN_FALSE;
}
- xmlDocPtr newdoc = dom_document_parser(ZEND_THIS, mode, source, source_len, options, NULL);
+ xmlDocPtr newdoc = dom_document_parser(ZEND_THIS, mode, source, options, NULL, NULL);
if (newdoc == DOM_DOCUMENT_MALFORMED) {
newdoc = NULL;
}
@@ -1542,7 +1558,7 @@ Since: DOM Level 3
*/
PHP_METHOD(DOMDocument, load)
{
- dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
+ dom_legacy_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
}
/* }}} end dom_document_load */
@@ -1551,7 +1567,7 @@ Since: DOM Level 3
*/
PHP_METHOD(DOMDocument, loadXML)
{
- dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
+ dom_legacy_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
}
/* }}} end dom_document_loadxml */
diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h
index 1c21d8a64e1a9..a7ea77becb9a8 100644
--- a/ext/dom/php_dom.h
+++ b/ext/dom/php_dom.h
@@ -179,13 +179,22 @@ void dom_set_document_ref_pointers(xmlNodePtr node, php_libxml_ref_obj *document
void dom_set_document_ref_pointers_attr(xmlAttrPtr attr, php_libxml_ref_obj *document);
typedef enum {
- DOM_LOAD_STRING = 0,
- DOM_LOAD_FILE = 1,
+ DOM_LOAD_STRING,
+ DOM_LOAD_FILE,
+ DOM_LOAD_STREAM,
} dom_load_mode;
+typedef union {
+ struct {
+ const char *str;
+ size_t str_len;
+ };
+ php_stream *stream;
+} dom_source_union;
+
#define DOM_DOCUMENT_MALFORMED ((xmlDocPtr) -1)
-xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding);
+xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, dom_source_union source, size_t options, xmlCharEncodingHandlerPtr encoding, const char *override_document_uri);
/* parentnode */
void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc);
diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php
index 00c693d3d4f81..31c4ba26a6d2f 100644
--- a/ext/dom/php_dom.stub.php
+++ b/ext/dom/php_dom.stub.php
@@ -2068,6 +2068,9 @@ public static function createEmpty(string $version = "1.0", string $encoding = "
public static function createFromFile(string $path, int $options = 0, ?string $overrideEncoding = null): XMLDocument {}
+ /** @param resource $stream */
+ public static function createFromStream($stream, ?string $documentURI = null, int $options = 0, ?string $overrideEncoding = null): XMLDocument {}
+
public static function createFromString(string $source, int $options = 0, ?string $overrideEncoding = null): XMLDocument {}
/**
diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h
index a4cc3e762a603..d0038885d7d46 100644
--- a/ext/dom/php_dom_arginfo.h
+++ b/ext/dom/php_dom_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 8018206ec17368080a8f58e03f3e4be53cde2e34 */
+ * Stub hash: 0a3830de3cf55ef30f22758db6825894f31cc819 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_dom_import_simplexml, 0, 1, DOMAttr|DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -1052,6 +1052,13 @@ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_XMLDocument_createFromF
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, overrideEncoding, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()
+ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_XMLDocument_createFromStream, 0, 1, Dom\\XMLDocument, 0)
+ ZEND_ARG_INFO(0, stream)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, documentURI, IS_STRING, 1, "null")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, overrideEncoding, IS_STRING, 1, "null")
+ZEND_END_ARG_INFO()
+
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_XMLDocument_createFromString, 0, 1, Dom\\XMLDocument, 0)
ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
@@ -1319,6 +1326,7 @@ ZEND_METHOD(Dom_HTMLDocument, debugGetTemplateCount);
#endif
ZEND_METHOD(Dom_XMLDocument, createEmpty);
ZEND_METHOD(Dom_XMLDocument, createFromFile);
+ZEND_METHOD(Dom_XMLDocument, createFromStream);
ZEND_METHOD(Dom_XMLDocument, createFromString);
ZEND_METHOD(Dom_XMLDocument, xinclude);
ZEND_METHOD(Dom_TokenList, item);
@@ -1775,6 +1783,7 @@ static const zend_function_entry class_Dom_HTMLDocument_methods[] = {
static const zend_function_entry class_Dom_XMLDocument_methods[] = {
ZEND_ME(Dom_XMLDocument, createEmpty, arginfo_class_Dom_XMLDocument_createEmpty, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
ZEND_ME(Dom_XMLDocument, createFromFile, arginfo_class_Dom_XMLDocument_createFromFile, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
+ ZEND_ME(Dom_XMLDocument, createFromStream, arginfo_class_Dom_XMLDocument_createFromStream, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
ZEND_ME(Dom_XMLDocument, createFromString, arginfo_class_Dom_XMLDocument_createFromString, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC)
ZEND_RAW_FENTRY("createEntityReference", zim_DOMDocument_createEntityReference, arginfo_class_Dom_XMLDocument_createEntityReference, ZEND_ACC_PUBLIC, NULL, NULL)
ZEND_RAW_FENTRY("validate", zim_DOMDocument_validate, arginfo_class_Dom_XMLDocument_validate, ZEND_ACC_PUBLIC, NULL, NULL)
diff --git a/ext/dom/tests/modern/xml/XMLDocument_createFromStream.phpt b/ext/dom/tests/modern/xml/XMLDocument_createFromStream.phpt
new file mode 100644
index 0000000000000..55b5a4248b223
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_createFromStream.phpt
@@ -0,0 +1,29 @@
+--TEST--
+Dom\XMLDocument::createFromStream() - from memory
+--EXTENSIONS--
+dom
+--FILE--
+");
+rewind($tmp);
+$dom1 = Dom\XMLDocument::createFromStream($tmp);
+rewind($tmp);
+$dom2 = Dom\XMLDocument::createFromStream($tmp, "http://example.com");
+fclose($tmp);
+
+var_dump($dom1->documentURI);
+var_dump($dom2->documentURI);
+
+echo $dom1->saveXml(), "\n";
+echo $dom2->saveXml(), "\n";
+
+?>
+--EXPECT--
+string(11) "about:blank"
+string(18) "http://example.com"
+
+
+
+
diff --git a/ext/dom/tests/modern/xml/XMLDocument_createFromStream_broken.phpt b/ext/dom/tests/modern/xml/XMLDocument_createFromStream_broken.phpt
new file mode 100644
index 0000000000000..73871f04c7132
--- /dev/null
+++ b/ext/dom/tests/modern/xml/XMLDocument_createFromStream_broken.phpt
@@ -0,0 +1,44 @@
+--TEST--
+Dom\HTMLDocument::createFromStream() - broken stream
+--EXTENSIONS--
+dom
+--FILE--
+first) {
+ $this->first = false;
+ return "";
+ }
+ throw new Error("broken");
+ }
+
+ public function stream_open(string $path, string $mode, int $options, ?string &$opened_path) {
+ return true;
+ }
+
+ public function stream_close(): void {
+ }
+
+ public function stream_eof(): bool {
+ return !$this->first;
+ }
+}
+
+stream_wrapper_register("foo", MyStream::class);
+
+$tmp = fopen("foo://", "r+");
+try {
+ $dom = Dom\XMLDocument::createFromStream($tmp);
+} catch (Error $e) {
+ echo $e->getMessage(), "\n";
+}
+fclose($tmp);
+
+?>
+--EXPECT--
+broken
diff --git a/ext/dom/xml_document.c b/ext/dom/xml_document.c
index 2bd3d908d7093..56503e82ddb51 100644
--- a/ext/dom/xml_document.c
+++ b/ext/dom/xml_document.c
@@ -131,43 +131,8 @@ PHP_METHOD(Dom_XMLDocument, createEmpty)
RETURN_THROWS();
}
-static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
+static void load_from_helper(zval *return_value, int mode, dom_source_union source, size_t options, const char *override_encoding, const char *override_document_uri)
{
- const char *source, *override_encoding = NULL;
- size_t source_len, override_encoding_len;
- zend_long options = 0;
- if (zend_parse_parameters(
- ZEND_NUM_ARGS(),
- "s|lp!",
- &source,
- &source_len,
- &options,
- &override_encoding,
- &override_encoding_len
- ) == FAILURE) {
- RETURN_THROWS();
- }
-
- if (!source_len) {
- zend_argument_value_error(1, "must not be empty");
- RETURN_THROWS();
- }
-
- if (ZEND_SIZE_T_INT_OVFL(source_len)) {
- zend_argument_value_error(1, "is too long");
- RETURN_THROWS();
- }
-
- /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */
- if (mode == DOM_LOAD_FILE && strstr(source, "%00")) {
- zend_argument_value_error(1, "must not contain percent-encoded NUL bytes");
- RETURN_THROWS();
- }
-
- if (!check_options_validity(2, options)) {
- RETURN_THROWS();
- }
-
xmlCharEncodingHandlerPtr encoding = NULL;
if (override_encoding != NULL) {
encoding = xmlFindCharEncodingHandler(override_encoding);
@@ -178,14 +143,14 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
options |= XML_PARSE_IGNORE_ENC;
}
- xmlDocPtr lxml_doc = dom_document_parser(NULL, mode, source, source_len, options, encoding);
+ xmlDocPtr lxml_doc = dom_document_parser(NULL, mode, source, options, encoding, override_document_uri);
if (UNEXPECTED(lxml_doc == NULL || lxml_doc == DOM_DOCUMENT_MALFORMED)) {
if (!EG(exception)) {
if (lxml_doc == DOM_DOCUMENT_MALFORMED) {
php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true);
} else {
if (mode == DOM_LOAD_FILE) {
- zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", source);
+ zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", source.str);
} else {
php_dom_throw_error(INVALID_STATE_ERR, true);
}
@@ -200,7 +165,7 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
}
}
- if (mode == DOM_LOAD_FILE && lxml_doc->URL != NULL) {
+ if ((mode == DOM_LOAD_FILE || mode == DOM_LOAD_STREAM) && lxml_doc->URL != NULL) {
if (!php_is_stream_path((char *) lxml_doc->URL)) {
/* Check for "file:/" instead of "file://" because of libxml2 quirk */
if (strncmp((const char *) lxml_doc->URL, "file:/", sizeof("file:/") - 1) != 0) {
@@ -235,6 +200,49 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
dom_document_convert_to_modern(intern->document, lxml_doc);
}
+static void load_from_string_or_file_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
+{
+ const char *source, *override_encoding = NULL;
+ size_t source_len, override_encoding_len;
+ zend_long options = 0;
+ if (zend_parse_parameters(
+ ZEND_NUM_ARGS(),
+ "s|lp!",
+ &source,
+ &source_len,
+ &options,
+ &override_encoding,
+ &override_encoding_len
+ ) == FAILURE) {
+ RETURN_THROWS();
+ }
+
+ if (!source_len) {
+ zend_argument_value_error(1, "must not be empty");
+ RETURN_THROWS();
+ }
+
+ if (ZEND_SIZE_T_INT_OVFL(source_len)) {
+ zend_argument_value_error(1, "is too long");
+ RETURN_THROWS();
+ }
+
+ /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */
+ if (mode == DOM_LOAD_FILE && strstr(source, "%00")) {
+ zend_argument_value_error(1, "must not contain percent-encoded NUL bytes");
+ RETURN_THROWS();
+ }
+
+ if (!check_options_validity(2, options)) {
+ RETURN_THROWS();
+ }
+
+ dom_source_union source_union;
+ source_union.str = source;
+ source_union.str_len = source_len;
+ load_from_helper(return_value, mode, source_union, options, override_encoding, NULL);
+}
+
void dom_document_convert_to_modern(php_libxml_ref_obj *document, xmlDocPtr lxml_doc)
{
php_dom_private_data *private_data = php_dom_private_data_create();
@@ -245,12 +253,41 @@ void dom_document_convert_to_modern(php_libxml_ref_obj *document, xmlDocPtr lxml
PHP_METHOD(Dom_XMLDocument, createFromString)
{
- load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
+ load_from_string_or_file_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
}
PHP_METHOD(Dom_XMLDocument, createFromFile)
{
- load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
+ load_from_string_or_file_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
+}
+
+PHP_METHOD(Dom_XMLDocument, createFromStream)
+{
+ zval *stream_zv;
+ dom_source_union source_union;
+ const char *document_uri = NULL, *override_encoding = NULL;
+ size_t document_uri_len = 0, override_encoding_len = 0;
+ zend_long options = 0;
+ if (zend_parse_parameters(
+ ZEND_NUM_ARGS(),
+ "r|p!lp!",
+ &stream_zv,
+ &document_uri,
+ &document_uri_len,
+ &options,
+ &override_encoding,
+ &override_encoding_len
+ ) == FAILURE) {
+ RETURN_THROWS();
+ }
+
+ php_stream_from_res(source_union.stream, Z_RES_P(stream_zv));
+
+ if (!check_options_validity(3, options)) {
+ RETURN_THROWS();
+ }
+
+ load_from_helper(return_value, DOM_LOAD_STREAM, source_union, options, override_encoding, document_uri);
}
static int php_new_dom_write_smart_str(void *context, const char *buffer, int len)
From af26fcffc2403df28305a65ab02ec942fe810bbc Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 19 Jan 2025 15:00:08 +0100
Subject: [PATCH 4/6] Move common encoding validity checking function to
ext-libxml
---
ext/libxml/libxml.c | 16 ++++++++++++++++
ext/libxml/php_libxml.h | 1 +
ext/xmlreader/php_xmlreader.c | 22 +++-------------------
3 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index 94704d09e463e..508925f8c33d4 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -1075,6 +1075,22 @@ PHP_LIBXML_API bool php_libxml_uses_internal_errors(void)
return xmlStructuredError == php_libxml_structured_error_handler;
}
+PHP_LIBXML_API bool php_libxml_is_valid_encoding(const char *encoding)
+{
+ if (!encoding) {
+ return true;
+ }
+
+ /* Normally we could use xmlTextReaderConstEncoding() afterwards but libxml2 < 2.12.0 has a bug of course
+ * where it returns NULL for some valid encodings instead. */
+ xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
+ if (!handler) {
+ return false;
+ }
+ xmlCharEncCloseFunc(handler);
+ return true;
+}
+
/* {{{ Disable libxml errors and allow user to fetch error information as needed */
PHP_FUNCTION(libxml_use_internal_errors)
{
diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h
index 3314bf6a7b28c..9062d62d3d5e8 100644
--- a/ext/libxml/php_libxml.h
+++ b/ext/libxml/php_libxml.h
@@ -214,6 +214,7 @@ PHP_LIBXML_API bool php_libxml_disable_entity_loader(bool disable);
PHP_LIBXML_API void php_libxml_set_old_ns(xmlDocPtr doc, xmlNsPtr ns);
PHP_LIBXML_API php_stream_context *php_libxml_get_stream_context(void);
PHP_LIBXML_API bool php_libxml_uses_internal_errors(void);
+PHP_LIBXML_API bool php_libxml_is_valid_encoding(const char *encoding);
PHP_LIBXML_API xmlChar *php_libxml_attr_value(const xmlAttr *attr, bool *free);
diff --git a/ext/xmlreader/php_xmlreader.c b/ext/xmlreader/php_xmlreader.c
index 992d60bd2c2ec..09423342de3b5 100644
--- a/ext/xmlreader/php_xmlreader.c
+++ b/ext/xmlreader/php_xmlreader.c
@@ -881,22 +881,6 @@ PHP_METHOD(XMLReader, next)
}
/* }}} */
-static bool xmlreader_valid_encoding(const char *encoding)
-{
- if (!encoding) {
- return true;
- }
-
- /* Normally we could use xmlTextReaderConstEncoding() afterwards but libxml2 < 2.12.0 has a bug of course
- * where it returns NULL for some valid encodings instead. */
- xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
- if (!handler) {
- return false;
- }
- xmlCharEncCloseFunc(handler);
- return true;
-}
-
/* {{{ Sets the URI that the XMLReader will parse. */
static void xml_reader_from_uri(INTERNAL_FUNCTION_PARAMETERS, zend_class_entry *instance_ce, bool use_exceptions)
{
@@ -925,7 +909,7 @@ static void xml_reader_from_uri(INTERNAL_FUNCTION_PARAMETERS, zend_class_entry *
RETURN_THROWS();
}
- if (!xmlreader_valid_encoding(encoding)) {
+ if (!php_libxml_is_valid_encoding(encoding)) {
zend_argument_value_error(2, "must be a valid character encoding");
RETURN_THROWS();
}
@@ -1013,7 +997,7 @@ PHP_METHOD(XMLReader, fromStream)
php_stream_from_res(stream, Z_RES_P(stream_zv));
- if (!xmlreader_valid_encoding(encoding_name)) {
+ if (!php_libxml_is_valid_encoding(encoding_name)) {
zend_argument_value_error(2, "must be a valid character encoding");
RETURN_THROWS();
}
@@ -1197,7 +1181,7 @@ static void xml_reader_from_string(INTERNAL_FUNCTION_PARAMETERS, zend_class_entr
RETURN_THROWS();
}
- if (!xmlreader_valid_encoding(encoding)) {
+ if (!php_libxml_is_valid_encoding(encoding)) {
zend_argument_value_error(2, "must be a valid character encoding");
RETURN_THROWS();
}
From 7f27a8115cb62ad3f39d0252846430331778ffe2 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 19 Jan 2025 15:00:24 +0100
Subject: [PATCH 5/6] Add simplexml_load_stream()
---
ext/simplexml/simplexml.c | 107 ++++++++++++------
ext/simplexml/simplexml.stub.php | 3 +
ext/simplexml/simplexml_arginfo.h | 14 ++-
.../tests/simplexml_load_stream_broken.phpt | 51 +++++++++
.../tests/simplexml_load_stream_errors.phpt | 18 +++
.../tests/simplexml_load_stream_memory.phpt | 35 ++++++
...exml_load_stream_memory_with_encoding.phpt | 27 +++++
7 files changed, 219 insertions(+), 36 deletions(-)
create mode 100644 ext/simplexml/tests/simplexml_load_stream_broken.phpt
create mode 100644 ext/simplexml/tests/simplexml_load_stream_errors.phpt
create mode 100644 ext/simplexml/tests/simplexml_load_stream_memory.phpt
create mode 100644 ext/simplexml/tests/simplexml_load_stream_memory_with_encoding.phpt
diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c
index 3dcf7ca8fb049..33feb91d55058 100644
--- a/ext/simplexml/simplexml.c
+++ b/ext/simplexml/simplexml.c
@@ -2183,17 +2183,37 @@ sxe_object_new(zend_class_entry *ce)
}
/* }}} */
+static void sxe_create_obj_from_doc(zval *return_value, xmlDocPtr docp, zend_class_entry *ce, zend_string *ns, bool isprefix)
+{
+ if (!docp) {
+ RETURN_FALSE;
+ }
+
+ zend_function *fptr_count;
+ if (!ce) {
+ ce = ce_SimpleXMLElement;
+ fptr_count = NULL;
+ } else {
+ fptr_count = php_sxe_find_fptr_count(ce);
+ }
+ php_sxe_object *sxe = php_sxe_object_new(ce, fptr_count);
+ sxe->iter.nsprefix = ZSTR_LEN(ns) ? zend_string_copy(ns) : NULL;
+ sxe->iter.isprefix = isprefix;
+ php_libxml_increment_doc_ref((php_libxml_node_object *)sxe, docp);
+ php_libxml_increment_node_ptr((php_libxml_node_object *)sxe, xmlDocGetRootElement(docp), NULL);
+
+ RETURN_OBJ(&sxe->zo);
+}
+
/* {{{ Load a filename and return a simplexml_element object to allow for processing */
PHP_FUNCTION(simplexml_load_file)
{
- php_sxe_object *sxe;
char *filename;
size_t filename_len;
xmlDocPtr docp;
zend_string *ns = zend_empty_string;
zend_long options = 0;
zend_class_entry *ce= ce_SimpleXMLElement;
- zend_function *fptr_count;
bool isprefix = 0;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "p|C!lSb", &filename, &filename_len, &ce, &options, &ns, &isprefix) == FAILURE) {
@@ -2209,37 +2229,70 @@ PHP_FUNCTION(simplexml_load_file)
docp = xmlReadFile(filename, NULL, (int)options);
PHP_LIBXML_RESTORE_GLOBALS(read_file);
- if (!docp) {
- RETURN_FALSE;
+ sxe_create_obj_from_doc(return_value, docp, ce, ns, isprefix);
+}
+/* }}} */
+
+static int sxe_stream_read(void *context, char *buffer, int len)
+{
+ zend_resource *resource = context;
+ if (EXPECTED(resource->ptr)) {
+ php_stream *stream = resource->ptr;
+ return php_stream_read(stream, buffer, len);
}
+ return -1;
+}
- if (!ce) {
- ce = ce_SimpleXMLElement;
- fptr_count = NULL;
- } else {
- fptr_count = php_sxe_find_fptr_count(ce);
+PHP_FUNCTION(simplexml_load_stream)
+{
+ zval *stream_zv;
+ php_stream *stream;
+ xmlDocPtr docp;
+ zend_string *ns = zend_empty_string;
+ zend_long options = 0;
+ zend_class_entry *ce = ce_SimpleXMLElement;
+ bool isprefix = 0;
+ const char *encoding = NULL;
+ const char *document_uri = NULL;
+ size_t encoding_len, document_uri_len;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "r|p!p!C!lSb",
+ &stream_zv, &encoding, &encoding_len, &document_uri, &document_uri_len, &ce, &options, &ns, &isprefix) == FAILURE) {
+ RETURN_THROWS();
}
- sxe = php_sxe_object_new(ce, fptr_count);
- sxe->iter.nsprefix = ZSTR_LEN(ns) ? zend_string_copy(ns) : NULL;
- sxe->iter.isprefix = isprefix;
- php_libxml_increment_doc_ref((php_libxml_node_object *)sxe, docp);
- php_libxml_increment_node_ptr((php_libxml_node_object *)sxe, xmlDocGetRootElement(docp), NULL);
- RETURN_OBJ(&sxe->zo);
+ php_stream_from_res(stream, Z_RES_P(stream_zv));
+
+ if (!php_libxml_is_valid_encoding(encoding)) {
+ zend_argument_value_error(2, "must be a valid character encoding");
+ RETURN_THROWS();
+ }
+
+ if (ZEND_LONG_EXCEEDS_INT(options)) {
+ zend_argument_value_error(5, "is too large");
+ RETURN_THROWS();
+ }
+
+ if (encoding) {
+ options |= XML_PARSE_IGNORE_ENC;
+ }
+
+ PHP_LIBXML_SANITIZE_GLOBALS(read_file);
+ docp = xmlReadIO(sxe_stream_read, NULL, stream->res, document_uri, encoding, (int) options);
+ PHP_LIBXML_RESTORE_GLOBALS(read_file);
+
+ sxe_create_obj_from_doc(return_value, docp, ce, ns, isprefix);
}
-/* }}} */
/* {{{ Load a string and return a simplexml_element object to allow for processing */
PHP_FUNCTION(simplexml_load_string)
{
- php_sxe_object *sxe;
char *data;
size_t data_len;
xmlDocPtr docp;
zend_string *ns = zend_empty_string;
zend_long options = 0;
zend_class_entry *ce= ce_SimpleXMLElement;
- zend_function *fptr_count;
bool isprefix = 0;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|C!lSb", &data, &data_len, &ce, &options, &ns, &isprefix) == FAILURE) {
@@ -2263,23 +2316,7 @@ PHP_FUNCTION(simplexml_load_string)
docp = xmlReadMemory(data, (int)data_len, NULL, NULL, (int)options);
PHP_LIBXML_RESTORE_GLOBALS(read_memory);
- if (!docp) {
- RETURN_FALSE;
- }
-
- if (!ce) {
- ce = ce_SimpleXMLElement;
- fptr_count = NULL;
- } else {
- fptr_count = php_sxe_find_fptr_count(ce);
- }
- sxe = php_sxe_object_new(ce, fptr_count);
- sxe->iter.nsprefix = ZSTR_LEN(ns) ? zend_string_copy(ns) : NULL;
- sxe->iter.isprefix = isprefix;
- php_libxml_increment_doc_ref((php_libxml_node_object *)sxe, docp);
- php_libxml_increment_node_ptr((php_libxml_node_object *)sxe, xmlDocGetRootElement(docp), NULL);
-
- RETURN_OBJ(&sxe->zo);
+ sxe_create_obj_from_doc(return_value, docp, ce, ns, isprefix);
}
/* }}} */
diff --git a/ext/simplexml/simplexml.stub.php b/ext/simplexml/simplexml.stub.php
index 2053fec6fdd2e..7386b4fdc1c1b 100644
--- a/ext/simplexml/simplexml.stub.php
+++ b/ext/simplexml/simplexml.stub.php
@@ -4,6 +4,9 @@
function simplexml_load_file(string $filename, ?string $class_name = SimpleXMLElement::class, int $options = 0, string $namespace_or_prefix = "", bool $is_prefix = false): SimpleXMLElement|false {}
+/** @param resource $stream */
+function simplexml_load_stream($stream, ?string $encoding = null, ?string $document_uri = null, ?string $class_name = SimpleXMLElement::class, int $options = 0, string $namespace_or_prefix = "", bool $is_prefix = false): SimpleXMLElement|false {}
+
function simplexml_load_string(string $data, ?string $class_name = SimpleXMLElement::class, int $options = 0, string $namespace_or_prefix = "", bool $is_prefix = false): SimpleXMLElement|false {}
function simplexml_import_dom(object $node, ?string $class_name = SimpleXMLElement::class): ?SimpleXMLElement {}
diff --git a/ext/simplexml/simplexml_arginfo.h b/ext/simplexml/simplexml_arginfo.h
index 379307f953a10..5a13186b275af 100644
--- a/ext/simplexml/simplexml_arginfo.h
+++ b/ext/simplexml/simplexml_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 36eac2dee86bcc386c24e2cc14caa7bd3d709e82 */
+ * Stub hash: a238d5299e5c2d1cbf10c1270d294809af05d5eb */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_simplexml_load_file, 0, 1, SimpleXMLElement, MAY_BE_FALSE)
ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0)
@@ -9,6 +9,16 @@ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_simplexml_load_file, 0, 1, S
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, is_prefix, _IS_BOOL, 0, "false")
ZEND_END_ARG_INFO()
+ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_simplexml_load_stream, 0, 1, SimpleXMLElement, MAY_BE_FALSE)
+ ZEND_ARG_INFO(0, stream)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, document_uri, IS_STRING, 1, "null")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, class_name, IS_STRING, 1, "SimpleXMLElement::class")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, namespace_or_prefix, IS_STRING, 0, "\"\"")
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, is_prefix, _IS_BOOL, 0, "false")
+ZEND_END_ARG_INFO()
+
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_simplexml_load_string, 0, 1, SimpleXMLElement, MAY_BE_FALSE)
ZEND_ARG_TYPE_INFO(0, data, IS_STRING, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, class_name, IS_STRING, 1, "SimpleXMLElement::class")
@@ -101,6 +111,7 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_SimpleXMLElement_
ZEND_END_ARG_INFO()
ZEND_FUNCTION(simplexml_load_file);
+ZEND_FUNCTION(simplexml_load_stream);
ZEND_FUNCTION(simplexml_load_string);
ZEND_FUNCTION(simplexml_import_dom);
ZEND_METHOD(SimpleXMLElement, xpath);
@@ -126,6 +137,7 @@ ZEND_METHOD(SimpleXMLElement, getChildren);
static const zend_function_entry ext_functions[] = {
ZEND_FE(simplexml_load_file, arginfo_simplexml_load_file)
+ ZEND_FE(simplexml_load_stream, arginfo_simplexml_load_stream)
ZEND_FE(simplexml_load_string, arginfo_simplexml_load_string)
ZEND_FE(simplexml_import_dom, arginfo_simplexml_import_dom)
ZEND_FE_END
diff --git a/ext/simplexml/tests/simplexml_load_stream_broken.phpt b/ext/simplexml/tests/simplexml_load_stream_broken.phpt
new file mode 100644
index 0000000000000..6d2098f63f585
--- /dev/null
+++ b/ext/simplexml/tests/simplexml_load_stream_broken.phpt
@@ -0,0 +1,51 @@
+--TEST--
+simplexml_load_stream() - from broken stream
+--EXTENSIONS--
+simplexml
+--FILE--
+first) {
+ $this->first = false;
+ return "";
+ }
+ return false;
+ }
+
+ public function stream_open(string $path, string $mode, int $options, ?string &$opened_path) {
+ return true;
+ }
+
+ public function stream_close(): void {
+ }
+
+ public function stream_eof(): bool {
+ return !$this->first;
+ }
+}
+
+stream_wrapper_register("foo", MyStream::class);
+
+$tmp = fopen("foo://", "r");
+$sxe = simplexml_load_stream($tmp);
+fclose($tmp);
+
+var_dump($sxe);
+
+?>
+--EXPECTF--
+int(8192)
+int(8192)
+%A
+Warning: simplexml_load_stream(): Entity: line 1: parser error : %s
+
+Warning: simplexml_load_stream(): in %s on line %d
+
+Warning: simplexml_load_stream():%s^ in %s on line %d
+bool(false)
diff --git a/ext/simplexml/tests/simplexml_load_stream_errors.phpt b/ext/simplexml/tests/simplexml_load_stream_errors.phpt
new file mode 100644
index 0000000000000..b863dfafe0744
--- /dev/null
+++ b/ext/simplexml/tests/simplexml_load_stream_errors.phpt
@@ -0,0 +1,18 @@
+--TEST--
+simplexml_load_stream() - errors
+--EXTENSIONS--
+simplexml
+--FILE--
+getMessage(), "\n";
+}
+fclose($tmp);
+
+?>
+--EXPECT--
+simplexml_load_stream(): Argument #2 ($encoding) must be a valid character encoding
diff --git a/ext/simplexml/tests/simplexml_load_stream_memory.phpt b/ext/simplexml/tests/simplexml_load_stream_memory.phpt
new file mode 100644
index 0000000000000..9a43a05495677
--- /dev/null
+++ b/ext/simplexml/tests/simplexml_load_stream_memory.phpt
@@ -0,0 +1,35 @@
+--TEST--
+simplexml_load_stream() - from memory stream
+--EXTENSIONS--
+simplexml
+--FILE--
+");
+rewind($tmp);
+$sxe1 = simplexml_load_stream($tmp);
+rewind($tmp);
+$sxe2 = simplexml_load_stream($tmp, document_uri: 'http://example.com');
+fclose($tmp);
+
+var_dump($sxe1, $sxe2);
+
+?>
+--EXPECTF--
+object(SimpleXMLElement)#%d (2) {
+ ["child1"]=>
+ object(SimpleXMLElement)#%d (0) {
+ }
+ ["child2"]=>
+ object(SimpleXMLElement)#%d (0) {
+ }
+}
+object(SimpleXMLElement)#%d (2) {
+ ["child1"]=>
+ object(SimpleXMLElement)#%d (0) {
+ }
+ ["child2"]=>
+ object(SimpleXMLElement)#%d (0) {
+ }
+}
diff --git a/ext/simplexml/tests/simplexml_load_stream_memory_with_encoding.phpt b/ext/simplexml/tests/simplexml_load_stream_memory_with_encoding.phpt
new file mode 100644
index 0000000000000..eff3159b1059b
--- /dev/null
+++ b/ext/simplexml/tests/simplexml_load_stream_memory_with_encoding.phpt
@@ -0,0 +1,27 @@
+--TEST--
+simplexml_load_stream() - from memory stream with encoding
+--EXTENSIONS--
+simplexml
+--FILE--
+ééé');
+rewind($tmp);
+$sxe1 = simplexml_load_stream($tmp, encoding: 'UTF-8');
+rewind($tmp);
+$sxe2 = simplexml_load_stream($tmp);
+fclose($tmp);
+
+var_dump($sxe1, $sxe2);
+
+?>
+--EXPECTF--
+object(SimpleXMLElement)#%d (1) {
+ [0]=>
+ string(6) "ééé"
+}
+object(SimpleXMLElement)#%d (1) {
+ [0]=>
+ string(18) "テゥテゥテゥ"
+}
From 1e55758af38338760a2546653da4ee6b059f1c70 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Thu, 30 Jan 2025 21:12:37 +0100
Subject: [PATCH 6/6] Prototype XSLTProcessor::transformToStream()
---
ext/xsl/php_xsl.stub.php | 6 ++
ext/xsl/php_xsl_arginfo.h | 10 ++-
ext/xsl/tests/transform_to_stream.phpt | 38 ++++++++++++
.../transform_to_stream_broken_stream.phpt | 52 ++++++++++++++++
ext/xsl/tests/transform_to_stream_errors.phpt | 19 ++++++
ext/xsl/xsltprocessor.c | 61 +++++++++++++++++++
6 files changed, 185 insertions(+), 1 deletion(-)
create mode 100644 ext/xsl/tests/transform_to_stream.phpt
create mode 100644 ext/xsl/tests/transform_to_stream_broken_stream.phpt
create mode 100644 ext/xsl/tests/transform_to_stream_errors.phpt
diff --git a/ext/xsl/php_xsl.stub.php b/ext/xsl/php_xsl.stub.php
index 24da81b7d71b8..a213755bd64ca 100644
--- a/ext/xsl/php_xsl.stub.php
+++ b/ext/xsl/php_xsl.stub.php
@@ -91,6 +91,12 @@ public function importStylesheet(object $stylesheet): bool {}
*/
public function transformToDoc(object $document, ?string $returnClass = null): object|false {}
+ /**
+ * @param DOMDocument|Dom\Document|SimpleXMLElement $document
+ * @param resource $stream
+ */
+ public function transformToStream(object $document, $stream, ?string $encoding = null): int {}
+
/**
* @param DOMDocument|Dom\Document|SimpleXMLElement $document
* @tentative-return-type
diff --git a/ext/xsl/php_xsl_arginfo.h b/ext/xsl/php_xsl_arginfo.h
index d040928197f65..74a849a42d40a 100644
--- a/ext/xsl/php_xsl_arginfo.h
+++ b/ext/xsl/php_xsl_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 5b4ce3f5f7dee60bde803b3c2eb3994777f56914 */
+ * Stub hash: dff0596dc4f7ebeb24cab180f8853c9b15c5d065 */
ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_XSLTProcessor_importStylesheet, 0, 1, _IS_BOOL, 0)
ZEND_ARG_TYPE_INFO(0, stylesheet, IS_OBJECT, 0)
@@ -10,6 +10,12 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_MASK_EX(arginfo_class_XSLTProcessor_tr
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, returnClass, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()
+ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_XSLTProcessor_transformToStream, 0, 2, IS_LONG, 0)
+ ZEND_ARG_TYPE_INFO(0, document, IS_OBJECT, 0)
+ ZEND_ARG_INFO(0, stream)
+ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
+ZEND_END_ARG_INFO()
+
ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_XSLTProcessor_transformToUri, 0, 2, IS_LONG, 0)
ZEND_ARG_TYPE_INFO(0, document, IS_OBJECT, 0)
ZEND_ARG_TYPE_INFO(0, uri, IS_STRING, 0)
@@ -61,6 +67,7 @@ ZEND_END_ARG_INFO()
ZEND_METHOD(XSLTProcessor, importStylesheet);
ZEND_METHOD(XSLTProcessor, transformToDoc);
+ZEND_METHOD(XSLTProcessor, transformToStream);
ZEND_METHOD(XSLTProcessor, transformToUri);
ZEND_METHOD(XSLTProcessor, transformToXml);
ZEND_METHOD(XSLTProcessor, setParameter);
@@ -76,6 +83,7 @@ ZEND_METHOD(XSLTProcessor, getSecurityPrefs);
static const zend_function_entry class_XSLTProcessor_methods[] = {
ZEND_ME(XSLTProcessor, importStylesheet, arginfo_class_XSLTProcessor_importStylesheet, ZEND_ACC_PUBLIC)
ZEND_ME(XSLTProcessor, transformToDoc, arginfo_class_XSLTProcessor_transformToDoc, ZEND_ACC_PUBLIC)
+ ZEND_ME(XSLTProcessor, transformToStream, arginfo_class_XSLTProcessor_transformToStream, ZEND_ACC_PUBLIC)
ZEND_ME(XSLTProcessor, transformToUri, arginfo_class_XSLTProcessor_transformToUri, ZEND_ACC_PUBLIC)
ZEND_ME(XSLTProcessor, transformToXml, arginfo_class_XSLTProcessor_transformToXml, ZEND_ACC_PUBLIC)
ZEND_ME(XSLTProcessor, setParameter, arginfo_class_XSLTProcessor_setParameter, ZEND_ACC_PUBLIC)
diff --git a/ext/xsl/tests/transform_to_stream.phpt b/ext/xsl/tests/transform_to_stream.phpt
new file mode 100644
index 0000000000000..fbac511d281ac
--- /dev/null
+++ b/ext/xsl/tests/transform_to_stream.phpt
@@ -0,0 +1,38 @@
+--TEST--
+XSLTProcessor::transformToStream() function - normal
+--EXTENSIONS--
+xsl
+--FILE--
+importStylesheet($xsl);
+
+$stream = fopen('php://output', 'w');
+$written = $proc->transformToStream($dom, $stream);
+fclose($stream);
+
+echo "\n";
+var_dump($written);
+
+$stream = fopen('php://output', 'w');
+$written = $proc->transformToStream($dom, $stream, 'iso-8859-1');
+fclose($stream);
+
+echo "\n";
+var_dump($written);
+?>
+--EXPECT--
+
+bar
+a1 b1 c1
+a2 c2
+ä3 b3 c3
+
+int(120)
+
+bar
+a1 b1 c1
+a2 c2
+ä3 b3 c3
+
+int(119)
diff --git a/ext/xsl/tests/transform_to_stream_broken_stream.phpt b/ext/xsl/tests/transform_to_stream_broken_stream.phpt
new file mode 100644
index 0000000000000..1d69502d4b395
--- /dev/null
+++ b/ext/xsl/tests/transform_to_stream_broken_stream.phpt
@@ -0,0 +1,52 @@
+--TEST--
+XSLTProcessor::transformToStream() function - broken stream
+--EXTENSIONS--
+xsl
+--FILE--
+first) {
+ $this->first = false;
+ var_dump($data);
+ }
+ throw new Error("broken");
+ }
+
+ public function stream_open(string $path, string $mode, int $options, ?string &$opened_path) {
+ return true;
+ }
+
+ public function stream_close(): void {
+ }
+
+ public function stream_eof(): bool {
+ return !$this->first;
+ }
+}
+
+stream_wrapper_register("foo", MyStream::class);
+
+include("prepare.inc");
+$proc->importStylesheet($xsl);
+
+$stream = fopen('foo://', 'w');
+stream_set_chunk_size($stream, 4);
+$written = $proc->transformToStream($dom, $stream);
+fclose($stream);
+
+echo "\n";
+var_dump($written);
+?>
+--EXPECTF--
+string(4) "stream_write('transformToStream(Object(DOMDocument), Resource id #%d)
+#2 {main}
+ thrown in %s on line %d
diff --git a/ext/xsl/tests/transform_to_stream_errors.phpt b/ext/xsl/tests/transform_to_stream_errors.phpt
new file mode 100644
index 0000000000000..a448ed3868a22
--- /dev/null
+++ b/ext/xsl/tests/transform_to_stream_errors.phpt
@@ -0,0 +1,19 @@
+--TEST--
+XSLTProcessor::transformToStream() function - errors
+--EXTENSIONS--
+xsl
+--FILE--
+importStylesheet($xsl);
+
+$stream = fopen('php://output', 'w');
+try {
+ $proc->transformToStream($dom, $stream, 'nope');
+} catch (ValueError $e) {
+ echo $e->getMessage(), "\n";
+}
+fclose($stream);
+?>
+--EXPECT--
+XSLTProcessor::transformToStream(): Argument #3 ($encoding) is not a valid document encoding
diff --git a/ext/xsl/xsltprocessor.c b/ext/xsl/xsltprocessor.c
index ea0f9232aced4..e7fe4cfbafb8f 100644
--- a/ext/xsl/xsltprocessor.c
+++ b/ext/xsl/xsltprocessor.c
@@ -21,6 +21,7 @@
#include "php.h"
#include "php_xsl.h"
+#include "Zend/zend_exceptions.h"
#include
#include "ext/libxml/php_libxml.h"
#include "ext/dom/namespace_compat.h"
@@ -481,6 +482,66 @@ PHP_METHOD(XSLTProcessor, transformToDoc)
}
/* }}} end XSLTProcessor::transformToDoc */
+static int xsl_stream_write(void *context, const char *buffer, int len)
+{
+ zend_resource *resource = context;
+ if (EXPECTED(resource->ptr)) {
+ php_stream *stream = resource->ptr;
+ return php_stream_write(stream, buffer, len);
+ }
+ return -1;
+}
+
+PHP_METHOD(XSLTProcessor, transformToStream)
+{
+ zval *docp, *stream_zv;
+ php_stream *stream;
+ const char *encoding = NULL;
+ size_t encoding_len;
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "or|p!", &docp, &stream_zv, &encoding, &encoding_len) == FAILURE) {
+ RETURN_THROWS();
+ }
+
+ php_stream_from_res(stream, Z_RES_P(stream_zv));
+
+ xmlCharEncodingHandlerPtr handler = NULL;
+ if (encoding) {
+ handler = xmlFindCharEncodingHandler(encoding);
+ if (UNEXPECTED(!handler)) {
+ zend_argument_value_error(3, "is not a valid document encoding");
+ RETURN_THROWS();
+ }
+ }
+
+ xsl_object *intern = Z_XSL_P(ZEND_THIS);
+ xsltStylesheetPtr sheetp = intern->ptr;
+
+ xmlOutputBufferPtr out = xmlOutputBufferCreateIO(xsl_stream_write, NULL, stream->res, handler);
+ if (UNEXPECTED(!out)) {
+ zend_throw_error(zend_ce_exception, "Failed to create output buffer");
+ RETURN_THROWS();
+ }
+
+ xmlDocPtr newdocp = php_xsl_apply_stylesheet(ZEND_THIS, intern, sheetp, docp);
+
+ int ret = -1;
+ if (newdocp) {
+ ret = xsltSaveResultTo(out, newdocp, sheetp);
+ xmlFreeDoc(newdocp);
+ }
+
+ xmlOutputBufferClose(out);
+
+ if (ret < 0) {
+ if (!EG(exception)) {
+ zend_throw_error(zend_ce_exception, "Failed to transform and write document");
+ }
+ RETURN_THROWS();
+ }
+
+ RETURN_LONG(ret);
+}
+
/* {{{ */
PHP_METHOD(XSLTProcessor, transformToUri)
{