From a284b6a279ea66d42760058fe8a0aee32f947c08 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 12 Jan 2025 16:41:55 +0100 Subject: [PATCH 1/6] Factor out HTML document creation from stream to separate function --- ext/dom/html_document.c | 94 +++++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 42 deletions(-) diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c index 41624bfe172fd..546a8f3fc538c 100644 --- a/ext/dom/html_document.c +++ b/ext/dom/html_document.c @@ -994,34 +994,17 @@ PHP_METHOD(Dom_HTMLDocument, createFromString) RETURN_THROWS(); } -PHP_METHOD(Dom_HTMLDocument, createFromFile) +static void dom_html_document_create_from_stream( + zval *return_value, + php_stream *stream, + zend_long options, + const char *override_encoding, + size_t override_encoding_len, + zend_string *opened_path, + const char *filename +) { - const char *filename, *override_encoding = NULL; php_dom_private_data *private_data = NULL; - size_t filename_len, override_encoding_len; - zend_long options = 0; - php_stream *stream = NULL; - if (zend_parse_parameters( - ZEND_NUM_ARGS(), - "p|lp!", - &filename, - &filename_len, - &options, - &override_encoding, - &override_encoding_len - ) == FAILURE) { - RETURN_THROWS(); - } - - /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */ - if (strstr(filename, "%00")) { - zend_argument_value_error(1, "must not contain percent-encoded NUL bytes"); - RETURN_THROWS(); - } - - if (!check_options_validity(2, options)) { - RETURN_THROWS(); - } dom_lexbor_libxml2_bridge_application_data application_data; application_data.input_name = filename; @@ -1058,15 +1041,6 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile) dom_setup_parser_encoding_manually((const lxb_char_t *) buf, encoding_data, &decoding_encoding_ctx, &application_data); } - zend_string *opened_path = NULL; - stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, &opened_path, php_libxml_get_stream_context()); - if (!stream) { - if (!EG(exception)) { - zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", filename); - } - RETURN_THROWS(); - } - /* MIME sniff */ if (should_determine_encoding_implicitly) { zend_string *charset = php_libxml_sniff_charset_from_stream(stream); @@ -1192,12 +1166,6 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile) lxml_doc->URL = xmlStrdup((const xmlChar *) filename); } - if (opened_path != NULL) { - zend_string_release_ex(opened_path, false); - } - php_stream_close(stream); - stream = NULL; - dom_object *intern = php_dom_instantiate_object_helper( return_value, dom_html_document_class_entry, @@ -1216,10 +1184,52 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile) php_dom_private_data_destroy(private_data); } lxb_html_document_destroy(document); - php_stream_close(stream); +} + +PHP_METHOD(Dom_HTMLDocument, createFromFile) +{ + const char *filename, *override_encoding = NULL; + size_t filename_len, override_encoding_len; + zend_long options = 0; + if (zend_parse_parameters( + ZEND_NUM_ARGS(), + "p|lp!", + &filename, + &filename_len, + &options, + &override_encoding, + &override_encoding_len + ) == FAILURE) { + RETURN_THROWS(); + } + + /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */ + if (strstr(filename, "%00")) { + zend_argument_value_error(1, "must not contain percent-encoded NUL bytes"); + RETURN_THROWS(); + } + + if (!check_options_validity(2, options)) { + RETURN_THROWS(); + } + + zend_string *opened_path = NULL; + php_stream *stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, &opened_path, php_libxml_get_stream_context()); + if (!stream) { + if (!EG(exception)) { + zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", filename); + } + RETURN_THROWS(); + } + + dom_html_document_create_from_stream( + return_value, stream, options, override_encoding, override_encoding_len, opened_path, filename + ); + if (opened_path != NULL) { zend_string_release_ex(opened_path, false); } + php_stream_close(stream); } static zend_result dom_write_output_smart_str(void *ctx, const char *buf, size_t size) From d987b6d3ba2dea3193264a1c37bc151145f80df5 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 12 Jan 2025 17:31:20 +0100 Subject: [PATCH 2/6] Implement Dom\HTMLDocument::createFromStream() --- ext/dom/html_document.c | 32 +++++++++++++- ext/dom/php_dom.stub.php | 3 ++ ext/dom/php_dom_arginfo.h | 11 ++++- .../parser/HTMLDocument_createFromStream.phpt | 27 ++++++++++++ .../HTMLDocument_createFromStream_broken.phpt | 44 +++++++++++++++++++ 5 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream.phpt create mode 100644 ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream_broken.phpt diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c index 546a8f3fc538c..53fbacd67e87f 100644 --- a/ext/dom/html_document.c +++ b/ext/dom/html_document.c @@ -1004,10 +1004,12 @@ static void dom_html_document_create_from_stream( const char *filename ) { + ZEND_ASSERT(stream != NULL); + php_dom_private_data *private_data = NULL; dom_lexbor_libxml2_bridge_application_data application_data; - application_data.input_name = filename; + application_data.input_name = filename ? filename : "Entity"; application_data.current_total_offset = 0; application_data.html_no_implied = options & HTML_PARSE_NOIMPLIED; dom_reset_line_column_cache(&application_data.cache_tokenizer); @@ -1232,6 +1234,34 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile) php_stream_close(stream); } +PHP_METHOD(Dom_HTMLDocument, createFromStream) +{ + php_stream *stream; + zval *stream_zv; + const char *document_uri = NULL; + const char *override_encoding = NULL; + size_t override_encoding_len, document_uri_len; + zend_long options = 0; + if (zend_parse_parameters( + ZEND_NUM_ARGS(), + "r|p!lp!", + &stream_zv, + &document_uri, + &document_uri_len, + &options, + &override_encoding, + &override_encoding_len + ) == FAILURE) { + RETURN_THROWS(); + } + + php_stream_from_res(stream, Z_RES_P(stream_zv)); + + dom_html_document_create_from_stream( + return_value, stream, options, override_encoding, override_encoding_len, NULL, document_uri + ); +} + static zend_result dom_write_output_smart_str(void *ctx, const char *buf, size_t size) { smart_str_appendl((smart_str *) ctx, buf, size); diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php index 43d26ec7a3c7d..00c693d3d4f81 100644 --- a/ext/dom/php_dom.stub.php +++ b/ext/dom/php_dom.stub.php @@ -2042,6 +2042,9 @@ public static function createEmpty(string $encoding = "UTF-8"): HTMLDocument {} public static function createFromFile(string $path, int $options = 0, ?string $overrideEncoding = null): HTMLDocument {} + /** @param resource $stream */ + public static function createFromStream($stream, ?string $documentURI = null, int $options = 0, ?string $overrideEncoding = null): HTMLDocument {} + public static function createFromString(string $source, int $options = 0, ?string $overrideEncoding = null): HTMLDocument {} /** @implementation-alias Dom\XMLDocument::saveXml */ diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h index 5c21b909b0e18..a4cc3e762a603 100644 --- a/ext/dom/php_dom_arginfo.h +++ b/ext/dom/php_dom_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 0fcee2fa666dc88faf084578dde157409a6f5594 */ + * Stub hash: 8018206ec17368080a8f58e03f3e4be53cde2e34 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_dom_import_simplexml, 0, 1, DOMAttr|DOMElement, 0) ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0) @@ -1005,6 +1005,13 @@ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_HTMLDocument_createFrom ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, overrideEncoding, IS_STRING, 1, "null") ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_HTMLDocument_createFromStream, 0, 1, Dom\\HTMLDocument, 0) + ZEND_ARG_INFO(0, stream) + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, documentURI, IS_STRING, 1, "null") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, overrideEncoding, IS_STRING, 1, "null") +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_HTMLDocument_createFromString, 0, 1, Dom\\HTMLDocument, 0) ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0") @@ -1302,6 +1309,7 @@ ZEND_METHOD(Dom_Document, registerNodeClass); ZEND_METHOD(Dom_Document, importLegacyNode); ZEND_METHOD(Dom_HTMLDocument, createEmpty); ZEND_METHOD(Dom_HTMLDocument, createFromFile); +ZEND_METHOD(Dom_HTMLDocument, createFromStream); ZEND_METHOD(Dom_HTMLDocument, createFromString); ZEND_METHOD(Dom_XMLDocument, saveXml); ZEND_METHOD(Dom_HTMLDocument, saveHtml); @@ -1752,6 +1760,7 @@ static const zend_function_entry class_Dom_Document_methods[] = { static const zend_function_entry class_Dom_HTMLDocument_methods[] = { ZEND_ME(Dom_HTMLDocument, createEmpty, arginfo_class_Dom_HTMLDocument_createEmpty, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC) ZEND_ME(Dom_HTMLDocument, createFromFile, arginfo_class_Dom_HTMLDocument_createFromFile, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC) + ZEND_ME(Dom_HTMLDocument, createFromStream, arginfo_class_Dom_HTMLDocument_createFromStream, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC) ZEND_ME(Dom_HTMLDocument, createFromString, arginfo_class_Dom_HTMLDocument_createFromString, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC) ZEND_RAW_FENTRY("saveXml", zim_Dom_XMLDocument_saveXml, arginfo_class_Dom_HTMLDocument_saveXml, ZEND_ACC_PUBLIC, NULL, NULL) ZEND_RAW_FENTRY("saveXmlFile", zim_DOMDocument_save, arginfo_class_Dom_HTMLDocument_saveXmlFile, ZEND_ACC_PUBLIC, NULL, NULL) diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream.phpt new file mode 100644 index 0000000000000..ef223cf4531a2 --- /dev/null +++ b/ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream.phpt @@ -0,0 +1,27 @@ +--TEST-- +Dom\HTMLDocument::createFromStream() - from memory +--EXTENSIONS-- +dom +--FILE-- +

Hello world

"); +rewind($tmp); +$dom1 = Dom\HTMLDocument::createFromStream($tmp); +rewind($tmp); +$dom2 = Dom\HTMLDocument::createFromStream($tmp, "http://example.com"); +fclose($tmp); + +var_dump($dom1->documentURI); +var_dump($dom2->documentURI); + +echo $dom1->saveHtml(), "\n"; +echo $dom2->saveHtml(), "\n"; + +?> +--EXPECT-- +string(11) "about:blank" +string(18) "http://example.com" +

Hello world

+

Hello world

diff --git a/ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream_broken.phpt b/ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream_broken.phpt new file mode 100644 index 0000000000000..c02d9cf1a8069 --- /dev/null +++ b/ext/dom/tests/modern/html/parser/HTMLDocument_createFromStream_broken.phpt @@ -0,0 +1,44 @@ +--TEST-- +Dom\HTMLDocument::createFromStream() - broken stream +--EXTENSIONS-- +dom +--FILE-- +first) { + $this->first = false; + return "

Hello"; + } + throw new Error("broken"); + } + + public function stream_open(string $path, string $mode, int $options, ?string &$opened_path) { + return true; + } + + public function stream_close(): void { + } + + public function stream_eof(): bool { + return !$this->first; + } +} + +stream_wrapper_register("foo", MyStream::class); + +$tmp = fopen("foo://", "r+"); +try { + $dom = Dom\HTMLDocument::createFromStream($tmp); +} catch (Error $e) { + echo $e->getMessage(), "\n"; +} +fclose($tmp); + +?> +--EXPECT-- +broken From 584d38c1c87a2e6524fc44cc69503efdeb77a571 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 19 Jan 2025 14:58:39 +0100 Subject: [PATCH 3/6] Add XMLDocument::createFromStream() --- ext/dom/document.c | 44 ++++--- ext/dom/php_dom.h | 15 ++- ext/dom/php_dom.stub.php | 3 + ext/dom/php_dom_arginfo.h | 11 +- .../xml/XMLDocument_createFromStream.phpt | 29 +++++ .../XMLDocument_createFromStream_broken.phpt | 44 +++++++ ext/dom/xml_document.c | 119 ++++++++++++------ 7 files changed, 206 insertions(+), 59 deletions(-) create mode 100644 ext/dom/tests/modern/xml/XMLDocument_createFromStream.phpt create mode 100644 ext/dom/tests/modern/xml/XMLDocument_createFromStream_broken.phpt diff --git a/ext/dom/document.c b/ext/dom/document.c index 378f076c56068..aad6e202fdf9d 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -1345,7 +1345,17 @@ const char *dom_get_valid_file_path(const char *source, char *resolved_path, int } /* }}} */ -xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding) /* {{{ */ +static int dom_stream_read(void *context, char *buffer, int len) +{ + zend_resource *resource = context; + if (EXPECTED(resource->ptr)) { + php_stream *stream = resource->ptr; + return php_stream_read(stream, buffer, len); + } + return -1; +} + +xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, dom_source_union source, size_t options, xmlCharEncodingHandlerPtr encoding, const char *override_document_uri) /* {{{ */ { xmlDocPtr ret; xmlParserCtxtPtr ctxt = NULL; @@ -1371,16 +1381,18 @@ xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, xmlInitParser(); if (mode == DOM_LOAD_FILE) { - if (CHECK_NULL_PATH(source, source_len)) { + if (CHECK_NULL_PATH(source.str, source.str_len)) { zend_argument_value_error(1, "must not contain any null bytes"); return NULL; } - const char *file_dest = dom_get_valid_file_path(source, resolved_path, MAXPATHLEN); + const char *file_dest = dom_get_valid_file_path(source.str, resolved_path, MAXPATHLEN); if (file_dest) { ctxt = xmlCreateFileParserCtxt(file_dest); } + } else if (mode == DOM_LOAD_STRING) { + ctxt = xmlCreateMemoryParserCtxt(source.str, source.str_len); } else { - ctxt = xmlCreateMemoryParserCtxt(source, source_len); + ctxt = xmlCreateIOParserCtxt(NULL, NULL, dom_stream_read, NULL, source.stream->res, XML_CHAR_ENCODING_NONE); } if (ctxt == NULL) { @@ -1393,7 +1405,7 @@ xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, } /* If loading from memory, we need to set the base directory for the document */ - if (mode != DOM_LOAD_FILE) { + if (mode == DOM_LOAD_STRING) { #ifdef HAVE_GETCWD directory = VCWD_GETCWD(resolved_path, MAXPATHLEN); #elif defined(HAVE_GETWD) @@ -1410,6 +1422,11 @@ xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, } ctxt->directory = (char *) xmlCanonicPath((const xmlChar *) resolved_path); } + } else if (override_document_uri) { + if(ctxt->directory != NULL) { + xmlFree(ctxt->directory); + } + ctxt->directory = (char *) xmlCanonicPath((const xmlChar *) override_document_uri); } ctxt->vctxt.error = php_libxml_ctx_error; @@ -1507,21 +1524,20 @@ static void php_dom_finish_loading_document(zval *this, zval *return_value, xmlD RETURN_TRUE; } -static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) +static void dom_legacy_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) { - char *source; - size_t source_len; + dom_source_union source; zend_long options = 0; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &source, &source_len, &options) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &source.str, &source.str_len, &options) == FAILURE) { RETURN_THROWS(); } - if (!source_len) { + if (!source.str_len) { zend_argument_must_not_be_empty_error(1); RETURN_THROWS(); } - if (ZEND_SIZE_T_INT_OVFL(source_len)) { + if (ZEND_SIZE_T_INT_OVFL(source.str_len)) { php_error_docref(NULL, E_WARNING, "Input string is too long"); RETURN_FALSE; } @@ -1530,7 +1546,7 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) RETURN_FALSE; } - xmlDocPtr newdoc = dom_document_parser(ZEND_THIS, mode, source, source_len, options, NULL); + xmlDocPtr newdoc = dom_document_parser(ZEND_THIS, mode, source, options, NULL, NULL); if (newdoc == DOM_DOCUMENT_MALFORMED) { newdoc = NULL; } @@ -1542,7 +1558,7 @@ Since: DOM Level 3 */ PHP_METHOD(DOMDocument, load) { - dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE); + dom_legacy_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE); } /* }}} end dom_document_load */ @@ -1551,7 +1567,7 @@ Since: DOM Level 3 */ PHP_METHOD(DOMDocument, loadXML) { - dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING); + dom_legacy_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING); } /* }}} end dom_document_loadxml */ diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 1c21d8a64e1a9..a7ea77becb9a8 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -179,13 +179,22 @@ void dom_set_document_ref_pointers(xmlNodePtr node, php_libxml_ref_obj *document void dom_set_document_ref_pointers_attr(xmlAttrPtr attr, php_libxml_ref_obj *document); typedef enum { - DOM_LOAD_STRING = 0, - DOM_LOAD_FILE = 1, + DOM_LOAD_STRING, + DOM_LOAD_FILE, + DOM_LOAD_STREAM, } dom_load_mode; +typedef union { + struct { + const char *str; + size_t str_len; + }; + php_stream *stream; +} dom_source_union; + #define DOM_DOCUMENT_MALFORMED ((xmlDocPtr) -1) -xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding); +xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, dom_source_union source, size_t options, xmlCharEncodingHandlerPtr encoding, const char *override_document_uri); /* parentnode */ void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc); diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php index 00c693d3d4f81..31c4ba26a6d2f 100644 --- a/ext/dom/php_dom.stub.php +++ b/ext/dom/php_dom.stub.php @@ -2068,6 +2068,9 @@ public static function createEmpty(string $version = "1.0", string $encoding = " public static function createFromFile(string $path, int $options = 0, ?string $overrideEncoding = null): XMLDocument {} + /** @param resource $stream */ + public static function createFromStream($stream, ?string $documentURI = null, int $options = 0, ?string $overrideEncoding = null): XMLDocument {} + public static function createFromString(string $source, int $options = 0, ?string $overrideEncoding = null): XMLDocument {} /** diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h index a4cc3e762a603..d0038885d7d46 100644 --- a/ext/dom/php_dom_arginfo.h +++ b/ext/dom/php_dom_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 8018206ec17368080a8f58e03f3e4be53cde2e34 */ + * Stub hash: 0a3830de3cf55ef30f22758db6825894f31cc819 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_dom_import_simplexml, 0, 1, DOMAttr|DOMElement, 0) ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0) @@ -1052,6 +1052,13 @@ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_XMLDocument_createFromF ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, overrideEncoding, IS_STRING, 1, "null") ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_XMLDocument_createFromStream, 0, 1, Dom\\XMLDocument, 0) + ZEND_ARG_INFO(0, stream) + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, documentURI, IS_STRING, 1, "null") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, overrideEncoding, IS_STRING, 1, "null") +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_XMLDocument_createFromString, 0, 1, Dom\\XMLDocument, 0) ZEND_ARG_TYPE_INFO(0, source, IS_STRING, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0") @@ -1319,6 +1326,7 @@ ZEND_METHOD(Dom_HTMLDocument, debugGetTemplateCount); #endif ZEND_METHOD(Dom_XMLDocument, createEmpty); ZEND_METHOD(Dom_XMLDocument, createFromFile); +ZEND_METHOD(Dom_XMLDocument, createFromStream); ZEND_METHOD(Dom_XMLDocument, createFromString); ZEND_METHOD(Dom_XMLDocument, xinclude); ZEND_METHOD(Dom_TokenList, item); @@ -1775,6 +1783,7 @@ static const zend_function_entry class_Dom_HTMLDocument_methods[] = { static const zend_function_entry class_Dom_XMLDocument_methods[] = { ZEND_ME(Dom_XMLDocument, createEmpty, arginfo_class_Dom_XMLDocument_createEmpty, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC) ZEND_ME(Dom_XMLDocument, createFromFile, arginfo_class_Dom_XMLDocument_createFromFile, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC) + ZEND_ME(Dom_XMLDocument, createFromStream, arginfo_class_Dom_XMLDocument_createFromStream, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC) ZEND_ME(Dom_XMLDocument, createFromString, arginfo_class_Dom_XMLDocument_createFromString, ZEND_ACC_PUBLIC|ZEND_ACC_STATIC) ZEND_RAW_FENTRY("createEntityReference", zim_DOMDocument_createEntityReference, arginfo_class_Dom_XMLDocument_createEntityReference, ZEND_ACC_PUBLIC, NULL, NULL) ZEND_RAW_FENTRY("validate", zim_DOMDocument_validate, arginfo_class_Dom_XMLDocument_validate, ZEND_ACC_PUBLIC, NULL, NULL) diff --git a/ext/dom/tests/modern/xml/XMLDocument_createFromStream.phpt b/ext/dom/tests/modern/xml/XMLDocument_createFromStream.phpt new file mode 100644 index 0000000000000..55b5a4248b223 --- /dev/null +++ b/ext/dom/tests/modern/xml/XMLDocument_createFromStream.phpt @@ -0,0 +1,29 @@ +--TEST-- +Dom\XMLDocument::createFromStream() - from memory +--EXTENSIONS-- +dom +--FILE-- +"); +rewind($tmp); +$dom1 = Dom\XMLDocument::createFromStream($tmp); +rewind($tmp); +$dom2 = Dom\XMLDocument::createFromStream($tmp, "http://example.com"); +fclose($tmp); + +var_dump($dom1->documentURI); +var_dump($dom2->documentURI); + +echo $dom1->saveXml(), "\n"; +echo $dom2->saveXml(), "\n"; + +?> +--EXPECT-- +string(11) "about:blank" +string(18) "http://example.com" + + + + diff --git a/ext/dom/tests/modern/xml/XMLDocument_createFromStream_broken.phpt b/ext/dom/tests/modern/xml/XMLDocument_createFromStream_broken.phpt new file mode 100644 index 0000000000000..73871f04c7132 --- /dev/null +++ b/ext/dom/tests/modern/xml/XMLDocument_createFromStream_broken.phpt @@ -0,0 +1,44 @@ +--TEST-- +Dom\HTMLDocument::createFromStream() - broken stream +--EXTENSIONS-- +dom +--FILE-- +first) { + $this->first = false; + return ""; + } + throw new Error("broken"); + } + + public function stream_open(string $path, string $mode, int $options, ?string &$opened_path) { + return true; + } + + public function stream_close(): void { + } + + public function stream_eof(): bool { + return !$this->first; + } +} + +stream_wrapper_register("foo", MyStream::class); + +$tmp = fopen("foo://", "r+"); +try { + $dom = Dom\XMLDocument::createFromStream($tmp); +} catch (Error $e) { + echo $e->getMessage(), "\n"; +} +fclose($tmp); + +?> +--EXPECT-- +broken diff --git a/ext/dom/xml_document.c b/ext/dom/xml_document.c index 2bd3d908d7093..56503e82ddb51 100644 --- a/ext/dom/xml_document.c +++ b/ext/dom/xml_document.c @@ -131,43 +131,8 @@ PHP_METHOD(Dom_XMLDocument, createEmpty) RETURN_THROWS(); } -static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode) +static void load_from_helper(zval *return_value, int mode, dom_source_union source, size_t options, const char *override_encoding, const char *override_document_uri) { - const char *source, *override_encoding = NULL; - size_t source_len, override_encoding_len; - zend_long options = 0; - if (zend_parse_parameters( - ZEND_NUM_ARGS(), - "s|lp!", - &source, - &source_len, - &options, - &override_encoding, - &override_encoding_len - ) == FAILURE) { - RETURN_THROWS(); - } - - if (!source_len) { - zend_argument_value_error(1, "must not be empty"); - RETURN_THROWS(); - } - - if (ZEND_SIZE_T_INT_OVFL(source_len)) { - zend_argument_value_error(1, "is too long"); - RETURN_THROWS(); - } - - /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */ - if (mode == DOM_LOAD_FILE && strstr(source, "%00")) { - zend_argument_value_error(1, "must not contain percent-encoded NUL bytes"); - RETURN_THROWS(); - } - - if (!check_options_validity(2, options)) { - RETURN_THROWS(); - } - xmlCharEncodingHandlerPtr encoding = NULL; if (override_encoding != NULL) { encoding = xmlFindCharEncodingHandler(override_encoding); @@ -178,14 +143,14 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode) options |= XML_PARSE_IGNORE_ENC; } - xmlDocPtr lxml_doc = dom_document_parser(NULL, mode, source, source_len, options, encoding); + xmlDocPtr lxml_doc = dom_document_parser(NULL, mode, source, options, encoding, override_document_uri); if (UNEXPECTED(lxml_doc == NULL || lxml_doc == DOM_DOCUMENT_MALFORMED)) { if (!EG(exception)) { if (lxml_doc == DOM_DOCUMENT_MALFORMED) { php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true); } else { if (mode == DOM_LOAD_FILE) { - zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", source); + zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", source.str); } else { php_dom_throw_error(INVALID_STATE_ERR, true); } @@ -200,7 +165,7 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode) lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8"); } } - if (mode == DOM_LOAD_FILE && lxml_doc->URL != NULL) { + if ((mode == DOM_LOAD_FILE || mode == DOM_LOAD_STREAM) && lxml_doc->URL != NULL) { if (!php_is_stream_path((char *) lxml_doc->URL)) { /* Check for "file:/" instead of "file://" because of libxml2 quirk */ if (strncmp((const char *) lxml_doc->URL, "file:/", sizeof("file:/") - 1) != 0) { @@ -235,6 +200,49 @@ static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode) dom_document_convert_to_modern(intern->document, lxml_doc); } +static void load_from_string_or_file_helper(INTERNAL_FUNCTION_PARAMETERS, int mode) +{ + const char *source, *override_encoding = NULL; + size_t source_len, override_encoding_len; + zend_long options = 0; + if (zend_parse_parameters( + ZEND_NUM_ARGS(), + "s|lp!", + &source, + &source_len, + &options, + &override_encoding, + &override_encoding_len + ) == FAILURE) { + RETURN_THROWS(); + } + + if (!source_len) { + zend_argument_value_error(1, "must not be empty"); + RETURN_THROWS(); + } + + if (ZEND_SIZE_T_INT_OVFL(source_len)) { + zend_argument_value_error(1, "is too long"); + RETURN_THROWS(); + } + + /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */ + if (mode == DOM_LOAD_FILE && strstr(source, "%00")) { + zend_argument_value_error(1, "must not contain percent-encoded NUL bytes"); + RETURN_THROWS(); + } + + if (!check_options_validity(2, options)) { + RETURN_THROWS(); + } + + dom_source_union source_union; + source_union.str = source; + source_union.str_len = source_len; + load_from_helper(return_value, mode, source_union, options, override_encoding, NULL); +} + void dom_document_convert_to_modern(php_libxml_ref_obj *document, xmlDocPtr lxml_doc) { php_dom_private_data *private_data = php_dom_private_data_create(); @@ -245,12 +253,41 @@ void dom_document_convert_to_modern(php_libxml_ref_obj *document, xmlDocPtr lxml PHP_METHOD(Dom_XMLDocument, createFromString) { - load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING); + load_from_string_or_file_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING); } PHP_METHOD(Dom_XMLDocument, createFromFile) { - load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE); + load_from_string_or_file_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE); +} + +PHP_METHOD(Dom_XMLDocument, createFromStream) +{ + zval *stream_zv; + dom_source_union source_union; + const char *document_uri = NULL, *override_encoding = NULL; + size_t document_uri_len = 0, override_encoding_len = 0; + zend_long options = 0; + if (zend_parse_parameters( + ZEND_NUM_ARGS(), + "r|p!lp!", + &stream_zv, + &document_uri, + &document_uri_len, + &options, + &override_encoding, + &override_encoding_len + ) == FAILURE) { + RETURN_THROWS(); + } + + php_stream_from_res(source_union.stream, Z_RES_P(stream_zv)); + + if (!check_options_validity(3, options)) { + RETURN_THROWS(); + } + + load_from_helper(return_value, DOM_LOAD_STREAM, source_union, options, override_encoding, document_uri); } static int php_new_dom_write_smart_str(void *context, const char *buffer, int len) From af26fcffc2403df28305a65ab02ec942fe810bbc Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 19 Jan 2025 15:00:08 +0100 Subject: [PATCH 4/6] Move common encoding validity checking function to ext-libxml --- ext/libxml/libxml.c | 16 ++++++++++++++++ ext/libxml/php_libxml.h | 1 + ext/xmlreader/php_xmlreader.c | 22 +++------------------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index 94704d09e463e..508925f8c33d4 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -1075,6 +1075,22 @@ PHP_LIBXML_API bool php_libxml_uses_internal_errors(void) return xmlStructuredError == php_libxml_structured_error_handler; } +PHP_LIBXML_API bool php_libxml_is_valid_encoding(const char *encoding) +{ + if (!encoding) { + return true; + } + + /* Normally we could use xmlTextReaderConstEncoding() afterwards but libxml2 < 2.12.0 has a bug of course + * where it returns NULL for some valid encodings instead. */ + xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding); + if (!handler) { + return false; + } + xmlCharEncCloseFunc(handler); + return true; +} + /* {{{ Disable libxml errors and allow user to fetch error information as needed */ PHP_FUNCTION(libxml_use_internal_errors) { diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h index 3314bf6a7b28c..9062d62d3d5e8 100644 --- a/ext/libxml/php_libxml.h +++ b/ext/libxml/php_libxml.h @@ -214,6 +214,7 @@ PHP_LIBXML_API bool php_libxml_disable_entity_loader(bool disable); PHP_LIBXML_API void php_libxml_set_old_ns(xmlDocPtr doc, xmlNsPtr ns); PHP_LIBXML_API php_stream_context *php_libxml_get_stream_context(void); PHP_LIBXML_API bool php_libxml_uses_internal_errors(void); +PHP_LIBXML_API bool php_libxml_is_valid_encoding(const char *encoding); PHP_LIBXML_API xmlChar *php_libxml_attr_value(const xmlAttr *attr, bool *free); diff --git a/ext/xmlreader/php_xmlreader.c b/ext/xmlreader/php_xmlreader.c index 992d60bd2c2ec..09423342de3b5 100644 --- a/ext/xmlreader/php_xmlreader.c +++ b/ext/xmlreader/php_xmlreader.c @@ -881,22 +881,6 @@ PHP_METHOD(XMLReader, next) } /* }}} */ -static bool xmlreader_valid_encoding(const char *encoding) -{ - if (!encoding) { - return true; - } - - /* Normally we could use xmlTextReaderConstEncoding() afterwards but libxml2 < 2.12.0 has a bug of course - * where it returns NULL for some valid encodings instead. */ - xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding); - if (!handler) { - return false; - } - xmlCharEncCloseFunc(handler); - return true; -} - /* {{{ Sets the URI that the XMLReader will parse. */ static void xml_reader_from_uri(INTERNAL_FUNCTION_PARAMETERS, zend_class_entry *instance_ce, bool use_exceptions) { @@ -925,7 +909,7 @@ static void xml_reader_from_uri(INTERNAL_FUNCTION_PARAMETERS, zend_class_entry * RETURN_THROWS(); } - if (!xmlreader_valid_encoding(encoding)) { + if (!php_libxml_is_valid_encoding(encoding)) { zend_argument_value_error(2, "must be a valid character encoding"); RETURN_THROWS(); } @@ -1013,7 +997,7 @@ PHP_METHOD(XMLReader, fromStream) php_stream_from_res(stream, Z_RES_P(stream_zv)); - if (!xmlreader_valid_encoding(encoding_name)) { + if (!php_libxml_is_valid_encoding(encoding_name)) { zend_argument_value_error(2, "must be a valid character encoding"); RETURN_THROWS(); } @@ -1197,7 +1181,7 @@ static void xml_reader_from_string(INTERNAL_FUNCTION_PARAMETERS, zend_class_entr RETURN_THROWS(); } - if (!xmlreader_valid_encoding(encoding)) { + if (!php_libxml_is_valid_encoding(encoding)) { zend_argument_value_error(2, "must be a valid character encoding"); RETURN_THROWS(); } From 7f27a8115cb62ad3f39d0252846430331778ffe2 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 19 Jan 2025 15:00:24 +0100 Subject: [PATCH 5/6] Add simplexml_load_stream() --- ext/simplexml/simplexml.c | 107 ++++++++++++------ ext/simplexml/simplexml.stub.php | 3 + ext/simplexml/simplexml_arginfo.h | 14 ++- .../tests/simplexml_load_stream_broken.phpt | 51 +++++++++ .../tests/simplexml_load_stream_errors.phpt | 18 +++ .../tests/simplexml_load_stream_memory.phpt | 35 ++++++ ...exml_load_stream_memory_with_encoding.phpt | 27 +++++ 7 files changed, 219 insertions(+), 36 deletions(-) create mode 100644 ext/simplexml/tests/simplexml_load_stream_broken.phpt create mode 100644 ext/simplexml/tests/simplexml_load_stream_errors.phpt create mode 100644 ext/simplexml/tests/simplexml_load_stream_memory.phpt create mode 100644 ext/simplexml/tests/simplexml_load_stream_memory_with_encoding.phpt diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c index 3dcf7ca8fb049..33feb91d55058 100644 --- a/ext/simplexml/simplexml.c +++ b/ext/simplexml/simplexml.c @@ -2183,17 +2183,37 @@ sxe_object_new(zend_class_entry *ce) } /* }}} */ +static void sxe_create_obj_from_doc(zval *return_value, xmlDocPtr docp, zend_class_entry *ce, zend_string *ns, bool isprefix) +{ + if (!docp) { + RETURN_FALSE; + } + + zend_function *fptr_count; + if (!ce) { + ce = ce_SimpleXMLElement; + fptr_count = NULL; + } else { + fptr_count = php_sxe_find_fptr_count(ce); + } + php_sxe_object *sxe = php_sxe_object_new(ce, fptr_count); + sxe->iter.nsprefix = ZSTR_LEN(ns) ? zend_string_copy(ns) : NULL; + sxe->iter.isprefix = isprefix; + php_libxml_increment_doc_ref((php_libxml_node_object *)sxe, docp); + php_libxml_increment_node_ptr((php_libxml_node_object *)sxe, xmlDocGetRootElement(docp), NULL); + + RETURN_OBJ(&sxe->zo); +} + /* {{{ Load a filename and return a simplexml_element object to allow for processing */ PHP_FUNCTION(simplexml_load_file) { - php_sxe_object *sxe; char *filename; size_t filename_len; xmlDocPtr docp; zend_string *ns = zend_empty_string; zend_long options = 0; zend_class_entry *ce= ce_SimpleXMLElement; - zend_function *fptr_count; bool isprefix = 0; if (zend_parse_parameters(ZEND_NUM_ARGS(), "p|C!lSb", &filename, &filename_len, &ce, &options, &ns, &isprefix) == FAILURE) { @@ -2209,37 +2229,70 @@ PHP_FUNCTION(simplexml_load_file) docp = xmlReadFile(filename, NULL, (int)options); PHP_LIBXML_RESTORE_GLOBALS(read_file); - if (!docp) { - RETURN_FALSE; + sxe_create_obj_from_doc(return_value, docp, ce, ns, isprefix); +} +/* }}} */ + +static int sxe_stream_read(void *context, char *buffer, int len) +{ + zend_resource *resource = context; + if (EXPECTED(resource->ptr)) { + php_stream *stream = resource->ptr; + return php_stream_read(stream, buffer, len); } + return -1; +} - if (!ce) { - ce = ce_SimpleXMLElement; - fptr_count = NULL; - } else { - fptr_count = php_sxe_find_fptr_count(ce); +PHP_FUNCTION(simplexml_load_stream) +{ + zval *stream_zv; + php_stream *stream; + xmlDocPtr docp; + zend_string *ns = zend_empty_string; + zend_long options = 0; + zend_class_entry *ce = ce_SimpleXMLElement; + bool isprefix = 0; + const char *encoding = NULL; + const char *document_uri = NULL; + size_t encoding_len, document_uri_len; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "r|p!p!C!lSb", + &stream_zv, &encoding, &encoding_len, &document_uri, &document_uri_len, &ce, &options, &ns, &isprefix) == FAILURE) { + RETURN_THROWS(); } - sxe = php_sxe_object_new(ce, fptr_count); - sxe->iter.nsprefix = ZSTR_LEN(ns) ? zend_string_copy(ns) : NULL; - sxe->iter.isprefix = isprefix; - php_libxml_increment_doc_ref((php_libxml_node_object *)sxe, docp); - php_libxml_increment_node_ptr((php_libxml_node_object *)sxe, xmlDocGetRootElement(docp), NULL); - RETURN_OBJ(&sxe->zo); + php_stream_from_res(stream, Z_RES_P(stream_zv)); + + if (!php_libxml_is_valid_encoding(encoding)) { + zend_argument_value_error(2, "must be a valid character encoding"); + RETURN_THROWS(); + } + + if (ZEND_LONG_EXCEEDS_INT(options)) { + zend_argument_value_error(5, "is too large"); + RETURN_THROWS(); + } + + if (encoding) { + options |= XML_PARSE_IGNORE_ENC; + } + + PHP_LIBXML_SANITIZE_GLOBALS(read_file); + docp = xmlReadIO(sxe_stream_read, NULL, stream->res, document_uri, encoding, (int) options); + PHP_LIBXML_RESTORE_GLOBALS(read_file); + + sxe_create_obj_from_doc(return_value, docp, ce, ns, isprefix); } -/* }}} */ /* {{{ Load a string and return a simplexml_element object to allow for processing */ PHP_FUNCTION(simplexml_load_string) { - php_sxe_object *sxe; char *data; size_t data_len; xmlDocPtr docp; zend_string *ns = zend_empty_string; zend_long options = 0; zend_class_entry *ce= ce_SimpleXMLElement; - zend_function *fptr_count; bool isprefix = 0; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|C!lSb", &data, &data_len, &ce, &options, &ns, &isprefix) == FAILURE) { @@ -2263,23 +2316,7 @@ PHP_FUNCTION(simplexml_load_string) docp = xmlReadMemory(data, (int)data_len, NULL, NULL, (int)options); PHP_LIBXML_RESTORE_GLOBALS(read_memory); - if (!docp) { - RETURN_FALSE; - } - - if (!ce) { - ce = ce_SimpleXMLElement; - fptr_count = NULL; - } else { - fptr_count = php_sxe_find_fptr_count(ce); - } - sxe = php_sxe_object_new(ce, fptr_count); - sxe->iter.nsprefix = ZSTR_LEN(ns) ? zend_string_copy(ns) : NULL; - sxe->iter.isprefix = isprefix; - php_libxml_increment_doc_ref((php_libxml_node_object *)sxe, docp); - php_libxml_increment_node_ptr((php_libxml_node_object *)sxe, xmlDocGetRootElement(docp), NULL); - - RETURN_OBJ(&sxe->zo); + sxe_create_obj_from_doc(return_value, docp, ce, ns, isprefix); } /* }}} */ diff --git a/ext/simplexml/simplexml.stub.php b/ext/simplexml/simplexml.stub.php index 2053fec6fdd2e..7386b4fdc1c1b 100644 --- a/ext/simplexml/simplexml.stub.php +++ b/ext/simplexml/simplexml.stub.php @@ -4,6 +4,9 @@ function simplexml_load_file(string $filename, ?string $class_name = SimpleXMLElement::class, int $options = 0, string $namespace_or_prefix = "", bool $is_prefix = false): SimpleXMLElement|false {} +/** @param resource $stream */ +function simplexml_load_stream($stream, ?string $encoding = null, ?string $document_uri = null, ?string $class_name = SimpleXMLElement::class, int $options = 0, string $namespace_or_prefix = "", bool $is_prefix = false): SimpleXMLElement|false {} + function simplexml_load_string(string $data, ?string $class_name = SimpleXMLElement::class, int $options = 0, string $namespace_or_prefix = "", bool $is_prefix = false): SimpleXMLElement|false {} function simplexml_import_dom(object $node, ?string $class_name = SimpleXMLElement::class): ?SimpleXMLElement {} diff --git a/ext/simplexml/simplexml_arginfo.h b/ext/simplexml/simplexml_arginfo.h index 379307f953a10..5a13186b275af 100644 --- a/ext/simplexml/simplexml_arginfo.h +++ b/ext/simplexml/simplexml_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 36eac2dee86bcc386c24e2cc14caa7bd3d709e82 */ + * Stub hash: a238d5299e5c2d1cbf10c1270d294809af05d5eb */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_simplexml_load_file, 0, 1, SimpleXMLElement, MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0) @@ -9,6 +9,16 @@ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_simplexml_load_file, 0, 1, S ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, is_prefix, _IS_BOOL, 0, "false") ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_simplexml_load_stream, 0, 1, SimpleXMLElement, MAY_BE_FALSE) + ZEND_ARG_INFO(0, stream) + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, document_uri, IS_STRING, 1, "null") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, class_name, IS_STRING, 1, "SimpleXMLElement::class") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, options, IS_LONG, 0, "0") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, namespace_or_prefix, IS_STRING, 0, "\"\"") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, is_prefix, _IS_BOOL, 0, "false") +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_simplexml_load_string, 0, 1, SimpleXMLElement, MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, data, IS_STRING, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, class_name, IS_STRING, 1, "SimpleXMLElement::class") @@ -101,6 +111,7 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_SimpleXMLElement_ ZEND_END_ARG_INFO() ZEND_FUNCTION(simplexml_load_file); +ZEND_FUNCTION(simplexml_load_stream); ZEND_FUNCTION(simplexml_load_string); ZEND_FUNCTION(simplexml_import_dom); ZEND_METHOD(SimpleXMLElement, xpath); @@ -126,6 +137,7 @@ ZEND_METHOD(SimpleXMLElement, getChildren); static const zend_function_entry ext_functions[] = { ZEND_FE(simplexml_load_file, arginfo_simplexml_load_file) + ZEND_FE(simplexml_load_stream, arginfo_simplexml_load_stream) ZEND_FE(simplexml_load_string, arginfo_simplexml_load_string) ZEND_FE(simplexml_import_dom, arginfo_simplexml_import_dom) ZEND_FE_END diff --git a/ext/simplexml/tests/simplexml_load_stream_broken.phpt b/ext/simplexml/tests/simplexml_load_stream_broken.phpt new file mode 100644 index 0000000000000..6d2098f63f585 --- /dev/null +++ b/ext/simplexml/tests/simplexml_load_stream_broken.phpt @@ -0,0 +1,51 @@ +--TEST-- +simplexml_load_stream() - from broken stream +--EXTENSIONS-- +simplexml +--FILE-- +first) { + $this->first = false; + return ""; + } + return false; + } + + public function stream_open(string $path, string $mode, int $options, ?string &$opened_path) { + return true; + } + + public function stream_close(): void { + } + + public function stream_eof(): bool { + return !$this->first; + } +} + +stream_wrapper_register("foo", MyStream::class); + +$tmp = fopen("foo://", "r"); +$sxe = simplexml_load_stream($tmp); +fclose($tmp); + +var_dump($sxe); + +?> +--EXPECTF-- +int(8192) +int(8192) +%A +Warning: simplexml_load_stream(): Entity: line 1: parser error : %s + +Warning: simplexml_load_stream(): in %s on line %d + +Warning: simplexml_load_stream():%s^ in %s on line %d +bool(false) diff --git a/ext/simplexml/tests/simplexml_load_stream_errors.phpt b/ext/simplexml/tests/simplexml_load_stream_errors.phpt new file mode 100644 index 0000000000000..b863dfafe0744 --- /dev/null +++ b/ext/simplexml/tests/simplexml_load_stream_errors.phpt @@ -0,0 +1,18 @@ +--TEST-- +simplexml_load_stream() - errors +--EXTENSIONS-- +simplexml +--FILE-- +getMessage(), "\n"; +} +fclose($tmp); + +?> +--EXPECT-- +simplexml_load_stream(): Argument #2 ($encoding) must be a valid character encoding diff --git a/ext/simplexml/tests/simplexml_load_stream_memory.phpt b/ext/simplexml/tests/simplexml_load_stream_memory.phpt new file mode 100644 index 0000000000000..9a43a05495677 --- /dev/null +++ b/ext/simplexml/tests/simplexml_load_stream_memory.phpt @@ -0,0 +1,35 @@ +--TEST-- +simplexml_load_stream() - from memory stream +--EXTENSIONS-- +simplexml +--FILE-- +"); +rewind($tmp); +$sxe1 = simplexml_load_stream($tmp); +rewind($tmp); +$sxe2 = simplexml_load_stream($tmp, document_uri: 'http://example.com'); +fclose($tmp); + +var_dump($sxe1, $sxe2); + +?> +--EXPECTF-- +object(SimpleXMLElement)#%d (2) { + ["child1"]=> + object(SimpleXMLElement)#%d (0) { + } + ["child2"]=> + object(SimpleXMLElement)#%d (0) { + } +} +object(SimpleXMLElement)#%d (2) { + ["child1"]=> + object(SimpleXMLElement)#%d (0) { + } + ["child2"]=> + object(SimpleXMLElement)#%d (0) { + } +} diff --git a/ext/simplexml/tests/simplexml_load_stream_memory_with_encoding.phpt b/ext/simplexml/tests/simplexml_load_stream_memory_with_encoding.phpt new file mode 100644 index 0000000000000..eff3159b1059b --- /dev/null +++ b/ext/simplexml/tests/simplexml_load_stream_memory_with_encoding.phpt @@ -0,0 +1,27 @@ +--TEST-- +simplexml_load_stream() - from memory stream with encoding +--EXTENSIONS-- +simplexml +--FILE-- +ééé'); +rewind($tmp); +$sxe1 = simplexml_load_stream($tmp, encoding: 'UTF-8'); +rewind($tmp); +$sxe2 = simplexml_load_stream($tmp); +fclose($tmp); + +var_dump($sxe1, $sxe2); + +?> +--EXPECTF-- +object(SimpleXMLElement)#%d (1) { + [0]=> + string(6) "ééé" +} +object(SimpleXMLElement)#%d (1) { + [0]=> + string(18) "テゥテゥテゥ" +} From 1e55758af38338760a2546653da4ee6b059f1c70 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 30 Jan 2025 21:12:37 +0100 Subject: [PATCH 6/6] Prototype XSLTProcessor::transformToStream() --- ext/xsl/php_xsl.stub.php | 6 ++ ext/xsl/php_xsl_arginfo.h | 10 ++- ext/xsl/tests/transform_to_stream.phpt | 38 ++++++++++++ .../transform_to_stream_broken_stream.phpt | 52 ++++++++++++++++ ext/xsl/tests/transform_to_stream_errors.phpt | 19 ++++++ ext/xsl/xsltprocessor.c | 61 +++++++++++++++++++ 6 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 ext/xsl/tests/transform_to_stream.phpt create mode 100644 ext/xsl/tests/transform_to_stream_broken_stream.phpt create mode 100644 ext/xsl/tests/transform_to_stream_errors.phpt diff --git a/ext/xsl/php_xsl.stub.php b/ext/xsl/php_xsl.stub.php index 24da81b7d71b8..a213755bd64ca 100644 --- a/ext/xsl/php_xsl.stub.php +++ b/ext/xsl/php_xsl.stub.php @@ -91,6 +91,12 @@ public function importStylesheet(object $stylesheet): bool {} */ public function transformToDoc(object $document, ?string $returnClass = null): object|false {} + /** + * @param DOMDocument|Dom\Document|SimpleXMLElement $document + * @param resource $stream + */ + public function transformToStream(object $document, $stream, ?string $encoding = null): int {} + /** * @param DOMDocument|Dom\Document|SimpleXMLElement $document * @tentative-return-type diff --git a/ext/xsl/php_xsl_arginfo.h b/ext/xsl/php_xsl_arginfo.h index d040928197f65..74a849a42d40a 100644 --- a/ext/xsl/php_xsl_arginfo.h +++ b/ext/xsl/php_xsl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 5b4ce3f5f7dee60bde803b3c2eb3994777f56914 */ + * Stub hash: dff0596dc4f7ebeb24cab180f8853c9b15c5d065 */ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_XSLTProcessor_importStylesheet, 0, 1, _IS_BOOL, 0) ZEND_ARG_TYPE_INFO(0, stylesheet, IS_OBJECT, 0) @@ -10,6 +10,12 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_MASK_EX(arginfo_class_XSLTProcessor_tr ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, returnClass, IS_STRING, 1, "null") ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_XSLTProcessor_transformToStream, 0, 2, IS_LONG, 0) + ZEND_ARG_TYPE_INFO(0, document, IS_OBJECT, 0) + ZEND_ARG_INFO(0, stream) + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null") +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_XSLTProcessor_transformToUri, 0, 2, IS_LONG, 0) ZEND_ARG_TYPE_INFO(0, document, IS_OBJECT, 0) ZEND_ARG_TYPE_INFO(0, uri, IS_STRING, 0) @@ -61,6 +67,7 @@ ZEND_END_ARG_INFO() ZEND_METHOD(XSLTProcessor, importStylesheet); ZEND_METHOD(XSLTProcessor, transformToDoc); +ZEND_METHOD(XSLTProcessor, transformToStream); ZEND_METHOD(XSLTProcessor, transformToUri); ZEND_METHOD(XSLTProcessor, transformToXml); ZEND_METHOD(XSLTProcessor, setParameter); @@ -76,6 +83,7 @@ ZEND_METHOD(XSLTProcessor, getSecurityPrefs); static const zend_function_entry class_XSLTProcessor_methods[] = { ZEND_ME(XSLTProcessor, importStylesheet, arginfo_class_XSLTProcessor_importStylesheet, ZEND_ACC_PUBLIC) ZEND_ME(XSLTProcessor, transformToDoc, arginfo_class_XSLTProcessor_transformToDoc, ZEND_ACC_PUBLIC) + ZEND_ME(XSLTProcessor, transformToStream, arginfo_class_XSLTProcessor_transformToStream, ZEND_ACC_PUBLIC) ZEND_ME(XSLTProcessor, transformToUri, arginfo_class_XSLTProcessor_transformToUri, ZEND_ACC_PUBLIC) ZEND_ME(XSLTProcessor, transformToXml, arginfo_class_XSLTProcessor_transformToXml, ZEND_ACC_PUBLIC) ZEND_ME(XSLTProcessor, setParameter, arginfo_class_XSLTProcessor_setParameter, ZEND_ACC_PUBLIC) diff --git a/ext/xsl/tests/transform_to_stream.phpt b/ext/xsl/tests/transform_to_stream.phpt new file mode 100644 index 0000000000000..fbac511d281ac --- /dev/null +++ b/ext/xsl/tests/transform_to_stream.phpt @@ -0,0 +1,38 @@ +--TEST-- +XSLTProcessor::transformToStream() function - normal +--EXTENSIONS-- +xsl +--FILE-- +importStylesheet($xsl); + +$stream = fopen('php://output', 'w'); +$written = $proc->transformToStream($dom, $stream); +fclose($stream); + +echo "\n"; +var_dump($written); + +$stream = fopen('php://output', 'w'); +$written = $proc->transformToStream($dom, $stream, 'iso-8859-1'); +fclose($stream); + +echo "\n"; +var_dump($written); +?> +--EXPECT-- + +bar +a1 b1 c1
+a2 c2
+ä3 b3 c3
+ +int(120) + +bar +a1 b1 c1
+a2 c2
+ä3 b3 c3
+ +int(119) diff --git a/ext/xsl/tests/transform_to_stream_broken_stream.phpt b/ext/xsl/tests/transform_to_stream_broken_stream.phpt new file mode 100644 index 0000000000000..1d69502d4b395 --- /dev/null +++ b/ext/xsl/tests/transform_to_stream_broken_stream.phpt @@ -0,0 +1,52 @@ +--TEST-- +XSLTProcessor::transformToStream() function - broken stream +--EXTENSIONS-- +xsl +--FILE-- +first) { + $this->first = false; + var_dump($data); + } + throw new Error("broken"); + } + + public function stream_open(string $path, string $mode, int $options, ?string &$opened_path) { + return true; + } + + public function stream_close(): void { + } + + public function stream_eof(): bool { + return !$this->first; + } +} + +stream_wrapper_register("foo", MyStream::class); + +include("prepare.inc"); +$proc->importStylesheet($xsl); + +$stream = fopen('foo://', 'w'); +stream_set_chunk_size($stream, 4); +$written = $proc->transformToStream($dom, $stream); +fclose($stream); + +echo "\n"; +var_dump($written); +?> +--EXPECTF-- +string(4) "stream_write('transformToStream(Object(DOMDocument), Resource id #%d) +#2 {main} + thrown in %s on line %d diff --git a/ext/xsl/tests/transform_to_stream_errors.phpt b/ext/xsl/tests/transform_to_stream_errors.phpt new file mode 100644 index 0000000000000..a448ed3868a22 --- /dev/null +++ b/ext/xsl/tests/transform_to_stream_errors.phpt @@ -0,0 +1,19 @@ +--TEST-- +XSLTProcessor::transformToStream() function - errors +--EXTENSIONS-- +xsl +--FILE-- +importStylesheet($xsl); + +$stream = fopen('php://output', 'w'); +try { + $proc->transformToStream($dom, $stream, 'nope'); +} catch (ValueError $e) { + echo $e->getMessage(), "\n"; +} +fclose($stream); +?> +--EXPECT-- +XSLTProcessor::transformToStream(): Argument #3 ($encoding) is not a valid document encoding diff --git a/ext/xsl/xsltprocessor.c b/ext/xsl/xsltprocessor.c index ea0f9232aced4..e7fe4cfbafb8f 100644 --- a/ext/xsl/xsltprocessor.c +++ b/ext/xsl/xsltprocessor.c @@ -21,6 +21,7 @@ #include "php.h" #include "php_xsl.h" +#include "Zend/zend_exceptions.h" #include #include "ext/libxml/php_libxml.h" #include "ext/dom/namespace_compat.h" @@ -481,6 +482,66 @@ PHP_METHOD(XSLTProcessor, transformToDoc) } /* }}} end XSLTProcessor::transformToDoc */ +static int xsl_stream_write(void *context, const char *buffer, int len) +{ + zend_resource *resource = context; + if (EXPECTED(resource->ptr)) { + php_stream *stream = resource->ptr; + return php_stream_write(stream, buffer, len); + } + return -1; +} + +PHP_METHOD(XSLTProcessor, transformToStream) +{ + zval *docp, *stream_zv; + php_stream *stream; + const char *encoding = NULL; + size_t encoding_len; + if (zend_parse_parameters(ZEND_NUM_ARGS(), "or|p!", &docp, &stream_zv, &encoding, &encoding_len) == FAILURE) { + RETURN_THROWS(); + } + + php_stream_from_res(stream, Z_RES_P(stream_zv)); + + xmlCharEncodingHandlerPtr handler = NULL; + if (encoding) { + handler = xmlFindCharEncodingHandler(encoding); + if (UNEXPECTED(!handler)) { + zend_argument_value_error(3, "is not a valid document encoding"); + RETURN_THROWS(); + } + } + + xsl_object *intern = Z_XSL_P(ZEND_THIS); + xsltStylesheetPtr sheetp = intern->ptr; + + xmlOutputBufferPtr out = xmlOutputBufferCreateIO(xsl_stream_write, NULL, stream->res, handler); + if (UNEXPECTED(!out)) { + zend_throw_error(zend_ce_exception, "Failed to create output buffer"); + RETURN_THROWS(); + } + + xmlDocPtr newdocp = php_xsl_apply_stylesheet(ZEND_THIS, intern, sheetp, docp); + + int ret = -1; + if (newdocp) { + ret = xsltSaveResultTo(out, newdocp, sheetp); + xmlFreeDoc(newdocp); + } + + xmlOutputBufferClose(out); + + if (ret < 0) { + if (!EG(exception)) { + zend_throw_error(zend_ce_exception, "Failed to transform and write document"); + } + RETURN_THROWS(); + } + + RETURN_LONG(ret); +} + /* {{{ */ PHP_METHOD(XSLTProcessor, transformToUri) {