Skip to content

[RFC] Add simplexml_load_stream(), HTMLDocument::createFromStream(), XMLDocument::createFromStream() #17522

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 30 additions & 14 deletions ext/dom/document.c
Original file line number Diff line number Diff line change
Expand Up @@ -1345,7 +1345,17 @@ const char *dom_get_valid_file_path(const char *source, char *resolved_path, int
}
/* }}} */

xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding) /* {{{ */
static int dom_stream_read(void *context, char *buffer, int len)
{
zend_resource *resource = context;
if (EXPECTED(resource->ptr)) {
php_stream *stream = resource->ptr;
return php_stream_read(stream, buffer, len);
}
return -1;
}

xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, dom_source_union source, size_t options, xmlCharEncodingHandlerPtr encoding, const char *override_document_uri) /* {{{ */
{
xmlDocPtr ret;
xmlParserCtxtPtr ctxt = NULL;
Expand All @@ -1371,16 +1381,18 @@ xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source,
xmlInitParser();

if (mode == DOM_LOAD_FILE) {
if (CHECK_NULL_PATH(source, source_len)) {
if (CHECK_NULL_PATH(source.str, source.str_len)) {
zend_argument_value_error(1, "must not contain any null bytes");
return NULL;
}
const char *file_dest = dom_get_valid_file_path(source, resolved_path, MAXPATHLEN);
const char *file_dest = dom_get_valid_file_path(source.str, resolved_path, MAXPATHLEN);
if (file_dest) {
ctxt = xmlCreateFileParserCtxt(file_dest);
}
} else if (mode == DOM_LOAD_STRING) {
ctxt = xmlCreateMemoryParserCtxt(source.str, source.str_len);
} else {
ctxt = xmlCreateMemoryParserCtxt(source, source_len);
ctxt = xmlCreateIOParserCtxt(NULL, NULL, dom_stream_read, NULL, source.stream->res, XML_CHAR_ENCODING_NONE);
}

if (ctxt == NULL) {
Expand All @@ -1393,7 +1405,7 @@ xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source,
}

/* If loading from memory, we need to set the base directory for the document */
if (mode != DOM_LOAD_FILE) {
if (mode == DOM_LOAD_STRING) {
#ifdef HAVE_GETCWD
directory = VCWD_GETCWD(resolved_path, MAXPATHLEN);
#elif defined(HAVE_GETWD)
Expand All @@ -1410,6 +1422,11 @@ xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source,
}
ctxt->directory = (char *) xmlCanonicPath((const xmlChar *) resolved_path);
}
} else if (override_document_uri) {
if(ctxt->directory != NULL) {
xmlFree(ctxt->directory);
}
ctxt->directory = (char *) xmlCanonicPath((const xmlChar *) override_document_uri);
}

ctxt->vctxt.error = php_libxml_ctx_error;
Expand Down Expand Up @@ -1507,21 +1524,20 @@ static void php_dom_finish_loading_document(zval *this, zval *return_value, xmlD
RETURN_TRUE;
}

static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode)
static void dom_legacy_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode)
{
char *source;
size_t source_len;
dom_source_union source;
zend_long options = 0;

if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &source, &source_len, &options) == FAILURE) {
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &source.str, &source.str_len, &options) == FAILURE) {
RETURN_THROWS();
}

if (!source_len) {
if (!source.str_len) {
zend_argument_must_not_be_empty_error(1);
RETURN_THROWS();
}
if (ZEND_SIZE_T_INT_OVFL(source_len)) {
if (ZEND_SIZE_T_INT_OVFL(source.str_len)) {
php_error_docref(NULL, E_WARNING, "Input string is too long");
RETURN_FALSE;
}
Expand All @@ -1530,7 +1546,7 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode)
RETURN_FALSE;
}

xmlDocPtr newdoc = dom_document_parser(ZEND_THIS, mode, source, source_len, options, NULL);
xmlDocPtr newdoc = dom_document_parser(ZEND_THIS, mode, source, options, NULL, NULL);
if (newdoc == DOM_DOCUMENT_MALFORMED) {
newdoc = NULL;
}
Expand All @@ -1542,7 +1558,7 @@ Since: DOM Level 3
*/
PHP_METHOD(DOMDocument, load)
{
dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
dom_legacy_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
}
/* }}} end dom_document_load */

Expand All @@ -1551,7 +1567,7 @@ Since: DOM Level 3
*/
PHP_METHOD(DOMDocument, loadXML)
{
dom_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
dom_legacy_parse_document(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
}
/* }}} end dom_document_loadxml */

Expand Down
126 changes: 83 additions & 43 deletions ext/dom/html_document.c
Original file line number Diff line number Diff line change
Expand Up @@ -994,37 +994,22 @@ PHP_METHOD(Dom_HTMLDocument, createFromString)
RETURN_THROWS();
}

PHP_METHOD(Dom_HTMLDocument, createFromFile)
static void dom_html_document_create_from_stream(
zval *return_value,
php_stream *stream,
zend_long options,
const char *override_encoding,
size_t override_encoding_len,
zend_string *opened_path,
const char *filename
)
{
const char *filename, *override_encoding = NULL;
php_dom_private_data *private_data = NULL;
size_t filename_len, override_encoding_len;
zend_long options = 0;
php_stream *stream = NULL;
if (zend_parse_parameters(
ZEND_NUM_ARGS(),
"p|lp!",
&filename,
&filename_len,
&options,
&override_encoding,
&override_encoding_len
) == FAILURE) {
RETURN_THROWS();
}

/* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */
if (strstr(filename, "%00")) {
zend_argument_value_error(1, "must not contain percent-encoded NUL bytes");
RETURN_THROWS();
}
ZEND_ASSERT(stream != NULL);

if (!check_options_validity(2, options)) {
RETURN_THROWS();
}
php_dom_private_data *private_data = NULL;

dom_lexbor_libxml2_bridge_application_data application_data;
application_data.input_name = filename;
application_data.input_name = filename ? filename : "Entity";
application_data.current_total_offset = 0;
application_data.html_no_implied = options & HTML_PARSE_NOIMPLIED;
dom_reset_line_column_cache(&application_data.cache_tokenizer);
Expand Down Expand Up @@ -1058,15 +1043,6 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile)
dom_setup_parser_encoding_manually((const lxb_char_t *) buf, encoding_data, &decoding_encoding_ctx, &application_data);
}

zend_string *opened_path = NULL;
stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, &opened_path, php_libxml_get_stream_context());
if (!stream) {
if (!EG(exception)) {
zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", filename);
}
RETURN_THROWS();
}

/* MIME sniff */
if (should_determine_encoding_implicitly) {
zend_string *charset = php_libxml_sniff_charset_from_stream(stream);
Expand Down Expand Up @@ -1192,12 +1168,6 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile)
lxml_doc->URL = xmlStrdup((const xmlChar *) filename);
}

if (opened_path != NULL) {
zend_string_release_ex(opened_path, false);
}
php_stream_close(stream);
stream = NULL;

dom_object *intern = php_dom_instantiate_object_helper(
return_value,
dom_html_document_class_entry,
Expand All @@ -1216,10 +1186,80 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile)
php_dom_private_data_destroy(private_data);
}
lxb_html_document_destroy(document);
php_stream_close(stream);
}

PHP_METHOD(Dom_HTMLDocument, createFromFile)
{
const char *filename, *override_encoding = NULL;
size_t filename_len, override_encoding_len;
zend_long options = 0;
if (zend_parse_parameters(
ZEND_NUM_ARGS(),
"p|lp!",
&filename,
&filename_len,
&options,
&override_encoding,
&override_encoding_len
) == FAILURE) {
RETURN_THROWS();
}

/* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */
if (strstr(filename, "%00")) {
zend_argument_value_error(1, "must not contain percent-encoded NUL bytes");
RETURN_THROWS();
}

if (!check_options_validity(2, options)) {
RETURN_THROWS();
}

zend_string *opened_path = NULL;
php_stream *stream = php_stream_open_wrapper_ex(filename, "rb", REPORT_ERRORS, &opened_path, php_libxml_get_stream_context());
if (!stream) {
if (!EG(exception)) {
zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", filename);
}
RETURN_THROWS();
}

dom_html_document_create_from_stream(
return_value, stream, options, override_encoding, override_encoding_len, opened_path, filename
);

if (opened_path != NULL) {
zend_string_release_ex(opened_path, false);
}
php_stream_close(stream);
}

PHP_METHOD(Dom_HTMLDocument, createFromStream)
{
php_stream *stream;
zval *stream_zv;
const char *document_uri = NULL;
const char *override_encoding = NULL;
size_t override_encoding_len, document_uri_len;
zend_long options = 0;
if (zend_parse_parameters(
ZEND_NUM_ARGS(),
"r|p!lp!",
&stream_zv,
&document_uri,
&document_uri_len,
&options,
&override_encoding,
&override_encoding_len
) == FAILURE) {
RETURN_THROWS();
}

php_stream_from_res(stream, Z_RES_P(stream_zv));

dom_html_document_create_from_stream(
return_value, stream, options, override_encoding, override_encoding_len, NULL, document_uri
);
}

static zend_result dom_write_output_smart_str(void *ctx, const char *buf, size_t size)
Expand Down
15 changes: 12 additions & 3 deletions ext/dom/php_dom.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,13 +179,22 @@ void dom_set_document_ref_pointers(xmlNodePtr node, php_libxml_ref_obj *document
void dom_set_document_ref_pointers_attr(xmlAttrPtr attr, php_libxml_ref_obj *document);

typedef enum {
DOM_LOAD_STRING = 0,
DOM_LOAD_FILE = 1,
DOM_LOAD_STRING,
DOM_LOAD_FILE,
DOM_LOAD_STREAM,
} dom_load_mode;

typedef union {
struct {
const char *str;
size_t str_len;
};
php_stream *stream;
} dom_source_union;

#define DOM_DOCUMENT_MALFORMED ((xmlDocPtr) -1)

xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding);
xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, dom_source_union source, size_t options, xmlCharEncodingHandlerPtr encoding, const char *override_document_uri);

/* parentnode */
void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc);
Expand Down
6 changes: 6 additions & 0 deletions ext/dom/php_dom.stub.php
Original file line number Diff line number Diff line change
Expand Up @@ -2042,6 +2042,9 @@ public static function createEmpty(string $encoding = "UTF-8"): HTMLDocument {}

public static function createFromFile(string $path, int $options = 0, ?string $overrideEncoding = null): HTMLDocument {}

/** @param resource $stream */
public static function createFromStream($stream, ?string $documentURI = null, int $options = 0, ?string $overrideEncoding = null): HTMLDocument {}

public static function createFromString(string $source, int $options = 0, ?string $overrideEncoding = null): HTMLDocument {}

/** @implementation-alias Dom\XMLDocument::saveXml */
Expand All @@ -2065,6 +2068,9 @@ public static function createEmpty(string $version = "1.0", string $encoding = "

public static function createFromFile(string $path, int $options = 0, ?string $overrideEncoding = null): XMLDocument {}

/** @param resource $stream */
public static function createFromStream($stream, ?string $documentURI = null, int $options = 0, ?string $overrideEncoding = null): XMLDocument {}

public static function createFromString(string $source, int $options = 0, ?string $overrideEncoding = null): XMLDocument {}

/**
Expand Down
20 changes: 19 additions & 1 deletion ext/dom/php_dom_arginfo.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading