From c012490371fc84dc81e23dfa394310b2758f1fbd Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 10 Apr 2024 17:22:04 +0200 Subject: [PATCH 1/6] Factor out reading an attribute value --- ext/dom/attr.c | 30 ++++++++++++++++++++++++++++++ ext/dom/php_dom.c | 23 ++++++----------------- ext/dom/php_dom.h | 2 ++ 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/ext/dom/attr.c b/ext/dom/attr.c index e48d124976c25..746ab4efa3604 100644 --- a/ext/dom/attr.c +++ b/ext/dom/attr.c @@ -199,4 +199,34 @@ PHP_METHOD(DOMAttr, isId) } /* }}} end dom_attr_is_id */ +xmlChar *dom_attr_value(const xmlAttr *attr, bool *free) +{ + /* For attributes we can have an optimized fast-path. + * This fast-path is only possible in the (common) case where the attribute + * has a single text child. Note that if the child or the content is NULL, this + * is equivalent to not having content (i.e. the attribute has the empty string as value). */ + + *free = false; + + if (attr->children == NULL) { + return BAD_CAST ""; + } + + if (attr->children->type == XML_TEXT_NODE && attr->children->next == NULL) { + if (attr->children->content == NULL) { + return BAD_CAST ""; + } else { + return attr->children->content; + } + } + + xmlChar *value = xmlNodeGetContent((const xmlNode *) attr); + if (UNEXPECTED(value == NULL)) { + return BAD_CAST ""; + } + + *free = true; + return value; +} + #endif diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index dce9a0fb490c8..d7cb423cd78cc 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -2273,24 +2273,13 @@ void php_dom_get_content_into_zval(const xmlNode *nodep, zval *return_value, boo } case XML_ATTRIBUTE_NODE: { - /* For attributes we can also have an optimized fast-path. - * This fast-path is only possible in the (common) case where the attribute - * has a single text child. Note that if the child or the content is NULL, this - * is equivalent to not having content (i.e. the attribute has the empty string as value). */ - - if (nodep->children == NULL) { - RETURN_EMPTY_STRING(); - } - - if (nodep->children->type == XML_TEXT_NODE && nodep->children->next == NULL) { - if (nodep->children->content == NULL) { - RETURN_EMPTY_STRING(); - } else { - RETURN_STRING((const char *) nodep->children->content); - } + bool free; + xmlChar *value = dom_attr_value((const xmlAttr *) nodep, &free); + RETURN_STRING_FAST((const char *) value); + if (free) { + xmlFree(value); } - - ZEND_FALLTHROUGH; + return; } default: { diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index e64695f07b1f1..b37f9ad0e8a67 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -174,6 +174,8 @@ void dom_document_convert_to_modern(php_libxml_ref_obj *document, xmlDocPtr lxml dom_object *php_dom_instantiate_object_helper(zval *return_value, zend_class_entry *ce, xmlNodePtr obj, dom_object *parent); xmlDocPtr php_dom_create_html_doc(void); +xmlChar *dom_attr_value(const xmlAttr *attr, bool *free); + typedef enum { DOM_LOAD_STRING = 0, DOM_LOAD_FILE = 1, From 66ea7eba0e49dcca3c97acaeabdb3148909e3aa7 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 10 Apr 2024 17:22:21 +0200 Subject: [PATCH 2/6] Implement HTMLCollection::namedItem() --- ext/dom/attr.c | 11 +++ ext/dom/config.m4 | 2 +- ext/dom/config.w32 | 2 +- ext/dom/html_collection.c | 74 +++++++++++++++++++ ext/dom/php_dom.h | 1 + ext/dom/php_dom.stub.php | 2 +- ext/dom/php_dom_arginfo.h | 8 +- .../HTMLCollection_named_reads.phpt | 41 ++++++++++ 8 files changed, 137 insertions(+), 4 deletions(-) create mode 100644 ext/dom/html_collection.c create mode 100644 ext/dom/tests/modern/html/interactions/HTMLCollection_named_reads.phpt diff --git a/ext/dom/attr.c b/ext/dom/attr.c index 746ab4efa3604..24c92e2a33ab1 100644 --- a/ext/dom/attr.c +++ b/ext/dom/attr.c @@ -229,4 +229,15 @@ xmlChar *dom_attr_value(const xmlAttr *attr, bool *free) return value; } +bool dom_compare_value(const xmlAttr *attr, const xmlChar *value) +{ + bool free; + xmlChar *attr_value = dom_attr_value(attr, &free); + bool result = xmlStrEqual(attr_value, value); + if (free) { + xmlFree(attr_value); + } + return result; +} + #endif diff --git a/ext/dom/config.m4 b/ext/dom/config.m4 index b3a92287b3f1c..4dcde6105a583 100644 --- a/ext/dom/config.m4 +++ b/ext/dom/config.m4 @@ -32,7 +32,7 @@ if test "$PHP_DOM" != "no"; then documentfragment.c domimplementation.c \ element.c node.c characterdata.c \ documenttype.c entity.c \ - nodelist.c text.c comment.c \ + nodelist.c html_collection.c text.c comment.c \ entityreference.c \ notation.c xpath.c dom_iterators.c \ namednodemap.c xpath_callbacks.c \ diff --git a/ext/dom/config.w32 b/ext/dom/config.w32 index a70d226d0fa31..1a5d33bf7ca4f 100644 --- a/ext/dom/config.w32 +++ b/ext/dom/config.w32 @@ -12,7 +12,7 @@ if (PHP_DOM == "yes") { domexception.c parentnode.c processinginstruction.c \ cdatasection.c documentfragment.c domimplementation.c element.c \ node.c characterdata.c documenttype.c \ - entity.c nodelist.c text.c comment.c \ + entity.c nodelist.c html_collection.c text.c comment.c \ entityreference.c \ notation.c xpath.c dom_iterators.c \ namednodemap.c xpath_callbacks.c", null, "-Iext/dom/lexbor"); diff --git a/ext/dom/html_collection.c b/ext/dom/html_collection.c new file mode 100644 index 0000000000000..dfcc304e8c7ee --- /dev/null +++ b/ext/dom/html_collection.c @@ -0,0 +1,74 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | https://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Niels Dossche | + +----------------------------------------------------------------------+ +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php.h" +#if defined(HAVE_LIBXML) && defined(HAVE_DOM) +#include "php_dom.h" +#include "namespace_compat.h" + +/* https://dom.spec.whatwg.org/#dom-htmlcollection-nameditem-key */ +PHP_METHOD(DOM_HTMLCollection, namedItem) +{ + zend_string *key; + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_PATH_STR(key) + ZEND_PARSE_PARAMETERS_END(); + + /* 1. If key is the empty string, return null. */ + if (ZSTR_LEN(key) == 0) { + RETURN_NULL(); + } + + dom_object *intern = Z_DOMOBJ_P(ZEND_THIS); + dom_nnodemap_object *objmap = intern->ptr; + + /* 2. Return the first element in the collection for which at least one of the following is true: */ + xmlNodePtr basep = dom_object_get_node(objmap->baseobj); + if (basep != NULL) { + int cur = 0; + int next = cur; + xmlNodePtr candidate = basep->children; + while (candidate != NULL) { + candidate = dom_get_elements_by_tag_name_ns_raw(basep, candidate, objmap->ns, objmap->local, objmap->local_lower, &cur, next); + if (candidate == NULL) { + break; + } + + xmlAttrPtr attr; + + /* it has an ID which is key; */ + if ((attr = xmlHasNsProp(candidate, BAD_CAST "id", NULL)) != NULL && dom_compare_value(attr, BAD_CAST ZSTR_VAL(key))) { + DOM_RET_OBJ(candidate, objmap->baseobj); + return; + } + /* it is in the HTML namespace and has a name attribute whose value is key; */ + else if (php_dom_ns_is_fast(candidate, php_dom_ns_is_html_magic_token)) { + if ((attr = xmlHasNsProp(candidate, BAD_CAST "name", NULL)) != NULL && dom_compare_value(attr, BAD_CAST ZSTR_VAL(key))) { + DOM_RET_OBJ(candidate, objmap->baseobj); + return; + } + } + + next = cur + 1; + } + } +} + +#endif diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index b37f9ad0e8a67..6435783f981b1 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -175,6 +175,7 @@ dom_object *php_dom_instantiate_object_helper(zval *return_value, zend_class_ent xmlDocPtr php_dom_create_html_doc(void); xmlChar *dom_attr_value(const xmlAttr *attr, bool *free); +bool dom_compare_value(const xmlAttr *attr, const xmlChar *value); typedef enum { DOM_LOAD_STRING = 0, diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php index 196a858817572..d744a94349727 100644 --- a/ext/dom/php_dom.stub.php +++ b/ext/dom/php_dom.stub.php @@ -1279,7 +1279,7 @@ class HTMLCollection implements \IteratorAggregate, \Countable /** @implementation-alias DOMNodeList::item */ public function item(int $index): ?Element {} - /* TODO: implement namedItem */ + public function namedItem(string $key): ?Element {} /** @implementation-alias DOMNodeList::count */ public function count(): int {} diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h index a386d5217dc9b..5e759686bc6c3 100644 --- a/ext/dom/php_dom_arginfo.h +++ b/ext/dom/php_dom_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: f441c789fdce91e8fc71f450b294c11059999af1 */ + * Stub hash: 37a1c811bfc8c611d686f0842d06fc327b54511f */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0) ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0) @@ -721,6 +721,10 @@ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_HTMLCollection_item, 0, ZEND_ARG_TYPE_INFO(0, index, IS_LONG, 0) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_DOM_HTMLCollection_namedItem, 0, 1, DOM\\Element, 1) + ZEND_ARG_TYPE_INFO(0, key, IS_STRING, 0) +ZEND_END_ARG_INFO() + #define arginfo_class_DOM_HTMLCollection_count arginfo_class_DOM_Node_getLineNo #define arginfo_class_DOM_HTMLCollection_getIterator arginfo_class_DOMNodeList_getIterator @@ -1256,6 +1260,7 @@ ZEND_METHOD(DOM_Node, appendChild); ZEND_METHOD(DOM_Node, replaceChild); ZEND_METHOD(DOM_Node, removeChild); ZEND_METHOD(DOM_Node, getNodePath); +ZEND_METHOD(DOM_HTMLCollection, namedItem); ZEND_METHOD(DOM_Element, removeAttribute); ZEND_METHOD(DOM_Element, setAttributeNodeNS); ZEND_METHOD(DOM_Element, removeAttributeNode); @@ -1620,6 +1625,7 @@ static const zend_function_entry class_DOM_DTDNamedNodeMap_methods[] = { static const zend_function_entry class_DOM_HTMLCollection_methods[] = { ZEND_RAW_FENTRY("item", zim_DOMNodeList_item, arginfo_class_DOM_HTMLCollection_item, ZEND_ACC_PUBLIC, NULL, NULL) + ZEND_ME(DOM_HTMLCollection, namedItem, arginfo_class_DOM_HTMLCollection_namedItem, ZEND_ACC_PUBLIC) ZEND_RAW_FENTRY("count", zim_DOMNodeList_count, arginfo_class_DOM_HTMLCollection_count, ZEND_ACC_PUBLIC, NULL, NULL) ZEND_RAW_FENTRY("getIterator", zim_DOMNodeList_getIterator, arginfo_class_DOM_HTMLCollection_getIterator, ZEND_ACC_PUBLIC, NULL, NULL) ZEND_FE_END diff --git a/ext/dom/tests/modern/html/interactions/HTMLCollection_named_reads.phpt b/ext/dom/tests/modern/html/interactions/HTMLCollection_named_reads.phpt new file mode 100644 index 0000000000000..708f618d1a95a --- /dev/null +++ b/ext/dom/tests/modern/html/interactions/HTMLCollection_named_reads.phpt @@ -0,0 +1,41 @@ +--TEST-- +HTMLCollection::namedItem() and dimension handling for named accesses +--EXTENSIONS-- +dom +--FILE-- + +]> + + 1 + 2 + 2 with entity + 3 + 4 + 5 + without html ns + with html ns + +XML; + +$dom = DOM\XMLDocument::createFromString($xml); +var_dump($dom->getElementsByTagName('node')->namedItem('foo')?->textContent); +var_dump($dom->getElementsByTagName('node')->namedItem('')?->textContent); +var_dump($dom->getElementsByTagName('node')->namedItem('does not exist')?->textContent); +var_dump($dom->getElementsByTagName('node')->namedItem('wrong')?->textContent); +var_dump($dom->getElementsByTagName('node')->namedItem('bar')?->textContent); +var_dump($dom->getElementsByTagName('x')->namedItem('foo')?->textContent); +var_dump($dom->getElementsByTagName('x')->namedItem('footest')?->textContent); + +?> +--EXPECT-- +string(1) "5" +NULL +NULL +string(1) "4" +string(12) "with html ns" +string(1) "2" +string(13) "2 with entity" From d9b4663773857e7f642ab54f12c9ad1050e2b5b9 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 10 Apr 2024 17:35:58 +0200 Subject: [PATCH 3/6] Move node list dimension handling to a separate file --- ext/dom/nodelist.c | 52 ++++++++++++++++++++++++++++++++++++++++++++ ext/dom/php_dom.c | 54 ---------------------------------------------- ext/dom/php_dom.h | 3 +++ 3 files changed, 55 insertions(+), 54 deletions(-) diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c index 0a0ab32e5c20e..79a5b1753c458 100644 --- a/ext/dom/nodelist.c +++ b/ext/dom/nodelist.c @@ -249,4 +249,56 @@ ZEND_METHOD(DOMNodeList, getIterator) zend_create_internal_iterator_zval(return_value, ZEND_THIS); } +static zend_long dom_modern_nodelist_get_index(zval *offset, bool *failed) +{ + zend_ulong lval; + ZVAL_DEREF(offset); + if (Z_TYPE_P(offset) == IS_LONG) { + *failed = false; + return Z_LVAL_P(offset); + } else if (Z_TYPE_P(offset) == IS_DOUBLE) { + *failed = false; + return zend_dval_to_lval_safe(Z_DVAL_P(offset)); + } else if (Z_TYPE_P(offset) == IS_STRING && ZEND_HANDLE_NUMERIC(Z_STR_P(offset), lval)) { + *failed = false; + return (zend_long) lval; + } else { + *failed = true; + return 0; + } +} + +zval *dom_modern_nodelist_read_dimension(zend_object *object, zval *offset, int type, zval *rv) +{ + if (UNEXPECTED(!offset)) { + zend_throw_error(NULL, "Cannot append to %s", ZSTR_VAL(object->ce->name)); + return NULL; + } + + bool failed; + zend_long lval = dom_modern_nodelist_get_index(offset, &failed); + if (UNEXPECTED(failed)) { + zend_illegal_container_offset(object->ce->name, offset, type); + return NULL; + } + + php_dom_nodelist_get_item_into_zval(php_dom_obj_from_obj(object)->ptr, lval, rv); + return rv; +} + +int dom_modern_nodelist_has_dimension(zend_object *object, zval *member, int check_empty) +{ + /* If it exists, it cannot be empty because nodes aren't empty. */ + ZEND_IGNORE_VALUE(check_empty); + + bool failed; + zend_long lval = dom_modern_nodelist_get_index(member, &failed); + if (UNEXPECTED(failed)) { + zend_illegal_container_offset(object->ce->name, member, BP_VAR_IS); + return 0; + } + + return lval >= 0 && lval < php_dom_get_nodelist_length(php_dom_obj_from_obj(object)); +} + #endif diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index d7cb423cd78cc..acf1b5a1e9294 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -656,8 +656,6 @@ static zval *dom_nodemap_read_dimension(zend_object *object, zval *offset, int t static int dom_nodemap_has_dimension(zend_object *object, zval *member, int check_empty); static zval *dom_modern_nodemap_read_dimension(zend_object *object, zval *offset, int type, zval *rv); static int dom_modern_nodemap_has_dimension(zend_object *object, zval *member, int check_empty); -static zval *dom_modern_nodelist_read_dimension(zend_object *object, zval *offset, int type, zval *rv); -static int dom_modern_nodelist_has_dimension(zend_object *object, zval *member, int check_empty); static zend_object *dom_objects_store_clone_obj(zend_object *zobject); #ifdef LIBXML_XPATH_ENABLED @@ -2193,58 +2191,6 @@ static int dom_nodelist_has_dimension(zend_object *object, zval *member, int che return offset >= 0 && offset < php_dom_get_nodelist_length(php_dom_obj_from_obj(object)); } -static zend_long dom_modern_nodelist_get_index(zval *offset, bool *failed) -{ - zend_ulong lval; - ZVAL_DEREF(offset); - if (Z_TYPE_P(offset) == IS_LONG) { - *failed = false; - return Z_LVAL_P(offset); - } else if (Z_TYPE_P(offset) == IS_DOUBLE) { - *failed = false; - return zend_dval_to_lval_safe(Z_DVAL_P(offset)); - } else if (Z_TYPE_P(offset) == IS_STRING && ZEND_HANDLE_NUMERIC(Z_STR_P(offset), lval)) { - *failed = false; - return (zend_long) lval; - } else { - *failed = true; - return 0; - } -} - -static zval *dom_modern_nodelist_read_dimension(zend_object *object, zval *offset, int type, zval *rv) -{ - if (UNEXPECTED(!offset)) { - zend_throw_error(NULL, "Cannot append to %s", ZSTR_VAL(object->ce->name)); - return NULL; - } - - bool failed; - zend_long lval = dom_modern_nodelist_get_index(offset, &failed); - if (UNEXPECTED(failed)) { - zend_illegal_container_offset(object->ce->name, offset, type); - return NULL; - } - - php_dom_nodelist_get_item_into_zval(php_dom_obj_from_obj(object)->ptr, lval, rv); - return rv; -} - -static int dom_modern_nodelist_has_dimension(zend_object *object, zval *member, int check_empty) -{ - /* If it exists, it cannot be empty because nodes aren't empty. */ - ZEND_IGNORE_VALUE(check_empty); - - bool failed; - zend_long lval = dom_modern_nodelist_get_index(member, &failed); - if (UNEXPECTED(failed)) { - zend_illegal_container_offset(object->ce->name, member, BP_VAR_IS); - return 0; - } - - return lval >= 0 && lval < php_dom_get_nodelist_length(php_dom_obj_from_obj(object)); -} - void dom_remove_all_children(xmlNodePtr nodep) { if (nodep->children) { diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 6435783f981b1..54e11f6d47b22 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -177,6 +177,9 @@ xmlDocPtr php_dom_create_html_doc(void); xmlChar *dom_attr_value(const xmlAttr *attr, bool *free); bool dom_compare_value(const xmlAttr *attr, const xmlChar *value); +zval *dom_modern_nodelist_read_dimension(zend_object *object, zval *offset, int type, zval *rv); +int dom_modern_nodelist_has_dimension(zend_object *object, zval *member, int check_empty); + typedef enum { DOM_LOAD_STRING = 0, DOM_LOAD_FILE = 1, From 28b2da5f54da371b3262e04ba8043bee58e32f0d Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 10 Apr 2024 18:15:57 +0200 Subject: [PATCH 4/6] Refactor dom_modern_nodelist_get_index() --- ext/dom/nodelist.c | 59 +++++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c index 79a5b1753c458..d212d6a79ffdc 100644 --- a/ext/dom/nodelist.c +++ b/ext/dom/nodelist.c @@ -249,23 +249,46 @@ ZEND_METHOD(DOMNodeList, getIterator) zend_create_internal_iterator_zval(return_value, ZEND_THIS); } -static zend_long dom_modern_nodelist_get_index(zval *offset, bool *failed) +enum dom_nodelist_dimension_index_type { + DOM_NODELIST_DIM_ILLEGAL, + DOM_NODELIST_DIM_STRING, + DOM_NODELIST_DIM_LONG, +}; + +typedef struct _dom_nodelist_dimension_index { + union { + zend_long lval; + zend_string *str; + }; + enum dom_nodelist_dimension_index_type type; +} dom_nodelist_dimension_index; + +static dom_nodelist_dimension_index dom_modern_nodelist_get_index(zval *offset) { - zend_ulong lval; + dom_nodelist_dimension_index ret; + ZVAL_DEREF(offset); + if (Z_TYPE_P(offset) == IS_LONG) { - *failed = false; - return Z_LVAL_P(offset); + ret.type = DOM_NODELIST_DIM_LONG; + ret.lval = Z_LVAL_P(offset); } else if (Z_TYPE_P(offset) == IS_DOUBLE) { - *failed = false; - return zend_dval_to_lval_safe(Z_DVAL_P(offset)); - } else if (Z_TYPE_P(offset) == IS_STRING && ZEND_HANDLE_NUMERIC(Z_STR_P(offset), lval)) { - *failed = false; - return (zend_long) lval; + ret.type = DOM_NODELIST_DIM_LONG; + ret.lval = zend_dval_to_lval_safe(Z_DVAL_P(offset)); + } else if (Z_TYPE_P(offset) == IS_STRING) { + zend_ulong lval; + if (ZEND_HANDLE_NUMERIC(Z_STR_P(offset), lval)) { + ret.type = DOM_NODELIST_DIM_LONG; + ret.lval = (zend_long) lval; + } else { + ret.type = DOM_NODELIST_DIM_STRING; + ret.str = Z_STR_P(offset); + } } else { - *failed = true; - return 0; + ret.type = DOM_NODELIST_DIM_ILLEGAL; } + + return ret; } zval *dom_modern_nodelist_read_dimension(zend_object *object, zval *offset, int type, zval *rv) @@ -275,14 +298,13 @@ zval *dom_modern_nodelist_read_dimension(zend_object *object, zval *offset, int return NULL; } - bool failed; - zend_long lval = dom_modern_nodelist_get_index(offset, &failed); - if (UNEXPECTED(failed)) { + dom_nodelist_dimension_index index = dom_modern_nodelist_get_index(offset); + if (UNEXPECTED(index.type == DOM_NODELIST_DIM_ILLEGAL || index.type == DOM_NODELIST_DIM_STRING)) { zend_illegal_container_offset(object->ce->name, offset, type); return NULL; } - php_dom_nodelist_get_item_into_zval(php_dom_obj_from_obj(object)->ptr, lval, rv); + php_dom_nodelist_get_item_into_zval(php_dom_obj_from_obj(object)->ptr, index.lval, rv); return rv; } @@ -291,14 +313,13 @@ int dom_modern_nodelist_has_dimension(zend_object *object, zval *member, int che /* If it exists, it cannot be empty because nodes aren't empty. */ ZEND_IGNORE_VALUE(check_empty); - bool failed; - zend_long lval = dom_modern_nodelist_get_index(member, &failed); - if (UNEXPECTED(failed)) { + dom_nodelist_dimension_index index = dom_modern_nodelist_get_index(member); + if (UNEXPECTED(index.type == DOM_NODELIST_DIM_ILLEGAL || index.type == DOM_NODELIST_DIM_STRING)) { zend_illegal_container_offset(object->ce->name, member, BP_VAR_IS); return 0; } - return lval >= 0 && lval < php_dom_get_nodelist_length(php_dom_obj_from_obj(object)); + return index.lval >= 0 && index.lval < php_dom_get_nodelist_length(php_dom_obj_from_obj(object)); } #endif From 459d4480625cd44b95ee75a896936494ba27b4a6 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 10 Apr 2024 18:21:39 +0200 Subject: [PATCH 5/6] Split off nodelist header components to nodelist.h --- ext/dom/nodelist.c | 17 ++--------------- ext/dom/nodelist.h | 40 ++++++++++++++++++++++++++++++++++++++++ ext/dom/php_dom.c | 1 + ext/dom/php_dom.h | 5 ----- 4 files changed, 43 insertions(+), 20 deletions(-) create mode 100644 ext/dom/nodelist.h diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c index d212d6a79ffdc..beda8939e5531 100644 --- a/ext/dom/nodelist.c +++ b/ext/dom/nodelist.c @@ -22,6 +22,7 @@ #include "php.h" #if defined(HAVE_LIBXML) && defined(HAVE_DOM) #include "php_dom.h" +#include "nodelist.h" #include "zend_interfaces.h" /* @@ -249,21 +250,7 @@ ZEND_METHOD(DOMNodeList, getIterator) zend_create_internal_iterator_zval(return_value, ZEND_THIS); } -enum dom_nodelist_dimension_index_type { - DOM_NODELIST_DIM_ILLEGAL, - DOM_NODELIST_DIM_STRING, - DOM_NODELIST_DIM_LONG, -}; - -typedef struct _dom_nodelist_dimension_index { - union { - zend_long lval; - zend_string *str; - }; - enum dom_nodelist_dimension_index_type type; -} dom_nodelist_dimension_index; - -static dom_nodelist_dimension_index dom_modern_nodelist_get_index(zval *offset) +dom_nodelist_dimension_index dom_modern_nodelist_get_index(zval *offset) { dom_nodelist_dimension_index ret; diff --git a/ext/dom/nodelist.h b/ext/dom/nodelist.h new file mode 100644 index 0000000000000..af76e34e41dcc --- /dev/null +++ b/ext/dom/nodelist.h @@ -0,0 +1,40 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | https://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Niels Dossche | + +----------------------------------------------------------------------+ +*/ + +#ifndef PHP_NODELIST_H +#define PHP_NODELIST_H + +enum dom_nodelist_dimension_index_type { + DOM_NODELIST_DIM_ILLEGAL, + DOM_NODELIST_DIM_STRING, + DOM_NODELIST_DIM_LONG, +}; + +typedef struct _dom_nodelist_dimension_index { + union { + zend_long lval; + zend_string *str; + }; + enum dom_nodelist_dimension_index_type type; +} dom_nodelist_dimension_index; + +void php_dom_nodelist_get_item_into_zval(dom_nnodemap_object *objmap, zend_long index, zval *return_value); +int php_dom_get_nodelist_length(dom_object *obj); +dom_nodelist_dimension_index dom_modern_nodelist_get_index(zval *offset); +zval *dom_modern_nodelist_read_dimension(zend_object *object, zval *offset, int type, zval *rv); +int dom_modern_nodelist_has_dimension(zend_object *object, zval *member, int check_empty); + +#endif diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index acf1b5a1e9294..c6cead3578d3f 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -23,6 +23,7 @@ #include "php.h" #if defined(HAVE_LIBXML) && defined(HAVE_DOM) #include "php_dom.h" +#include "nodelist.h" #include "namespace_compat.h" #include "internal_helpers.h" #include "php_dom_arginfo.h" diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 54e11f6d47b22..29e390a096413 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -177,9 +177,6 @@ xmlDocPtr php_dom_create_html_doc(void); xmlChar *dom_attr_value(const xmlAttr *attr, bool *free); bool dom_compare_value(const xmlAttr *attr, const xmlChar *value); -zval *dom_modern_nodelist_read_dimension(zend_object *object, zval *offset, int type, zval *rv); -int dom_modern_nodelist_has_dimension(zend_object *object, zval *member, int check_empty); - typedef enum { DOM_LOAD_STRING = 0, DOM_LOAD_FILE = 1, @@ -209,9 +206,7 @@ xmlNodePtr php_dom_named_node_map_get_named_item(dom_nnodemap_object *objmap, co void php_dom_named_node_map_get_named_item_into_zval(dom_nnodemap_object *objmap, const zend_string *named, zval *return_value); xmlNodePtr php_dom_named_node_map_get_item(dom_nnodemap_object *objmap, zend_long index); void php_dom_named_node_map_get_item_into_zval(dom_nnodemap_object *objmap, zend_long index, zval *return_value); -void php_dom_nodelist_get_item_into_zval(dom_nnodemap_object *objmap, zend_long index, zval *return_value); int php_dom_get_namednodemap_length(dom_object *obj); -int php_dom_get_nodelist_length(dom_object *obj); xmlNodePtr dom_clone_node(php_dom_libxml_ns_mapper *ns_mapper, xmlNodePtr node, xmlDocPtr doc, bool recursive); From 132fd6c0bc360693f0976a192a30bc79694c3448 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 10 Apr 2024 19:06:22 +0200 Subject: [PATCH 6/6] Support named items in dimension handling for HTMLCollection --- ext/dom/html_collection.c | 93 ++++++++++++++++--- ext/dom/html_collection.h | 23 +++++ ext/dom/nodelist.c | 2 +- ext/dom/nodelist.h | 2 +- ext/dom/php_dom.c | 8 +- .../HTMLCollection_dimension_errors.phpt | 32 +++++++ .../HTMLCollection_named_reads.phpt | 55 +++++++++-- 7 files changed, 193 insertions(+), 22 deletions(-) create mode 100644 ext/dom/html_collection.h create mode 100644 ext/dom/tests/modern/html/interactions/HTMLCollection_dimension_errors.phpt diff --git a/ext/dom/html_collection.c b/ext/dom/html_collection.c index dfcc304e8c7ee..3b74074559a26 100644 --- a/ext/dom/html_collection.c +++ b/ext/dom/html_collection.c @@ -21,29 +21,33 @@ #include "php.h" #if defined(HAVE_LIBXML) && defined(HAVE_DOM) #include "php_dom.h" +#include "nodelist.h" +#include "html_collection.h" #include "namespace_compat.h" +typedef struct _dom_named_item { + dom_object *context_intern; + xmlNodePtr node; +} dom_named_item; + /* https://dom.spec.whatwg.org/#dom-htmlcollection-nameditem-key */ -PHP_METHOD(DOM_HTMLCollection, namedItem) +static dom_named_item dom_html_collection_named_item(zend_string *key, zend_object *zobj) { - zend_string *key; - ZEND_PARSE_PARAMETERS_START(1, 1) - Z_PARAM_PATH_STR(key) - ZEND_PARSE_PARAMETERS_END(); + dom_named_item ret = {NULL, NULL}; /* 1. If key is the empty string, return null. */ if (ZSTR_LEN(key) == 0) { - RETURN_NULL(); + return ret; } - dom_object *intern = Z_DOMOBJ_P(ZEND_THIS); + dom_object *intern = php_dom_obj_from_obj(zobj); dom_nnodemap_object *objmap = intern->ptr; /* 2. Return the first element in the collection for which at least one of the following is true: */ xmlNodePtr basep = dom_object_get_node(objmap->baseobj); if (basep != NULL) { int cur = 0; - int next = cur; + int next = cur; /* not +1, otherwise we skip the first candidate */ xmlNodePtr candidate = basep->children; while (candidate != NULL) { candidate = dom_get_elements_by_tag_name_ns_raw(basep, candidate, objmap->ns, objmap->local, objmap->local_lower, &cur, next); @@ -55,20 +59,85 @@ PHP_METHOD(DOM_HTMLCollection, namedItem) /* it has an ID which is key; */ if ((attr = xmlHasNsProp(candidate, BAD_CAST "id", NULL)) != NULL && dom_compare_value(attr, BAD_CAST ZSTR_VAL(key))) { - DOM_RET_OBJ(candidate, objmap->baseobj); - return; + ret.context_intern = objmap->baseobj; + ret.node = candidate; + return ret; } /* it is in the HTML namespace and has a name attribute whose value is key; */ else if (php_dom_ns_is_fast(candidate, php_dom_ns_is_html_magic_token)) { if ((attr = xmlHasNsProp(candidate, BAD_CAST "name", NULL)) != NULL && dom_compare_value(attr, BAD_CAST ZSTR_VAL(key))) { - DOM_RET_OBJ(candidate, objmap->baseobj); - return; + ret.context_intern = objmap->baseobj; + ret.node = candidate; + return ret; } } next = cur + 1; } } + + return ret; +} + +static void dom_html_collection_named_item_into_zval(zval *return_value, zend_string *key, zend_object *zobj) +{ + dom_named_item named_item = dom_html_collection_named_item(key, zobj); + if (named_item.node != NULL) { + DOM_RET_OBJ(named_item.node, named_item.context_intern); + } else { + RETURN_NULL(); + } +} + +PHP_METHOD(DOM_HTMLCollection, namedItem) +{ + zend_string *key; + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STR(key) + ZEND_PARSE_PARAMETERS_END(); + dom_html_collection_named_item_into_zval(return_value, key, Z_OBJ_P(ZEND_THIS)); +} + +zval *dom_html_collection_read_dimension(zend_object *object, zval *offset, int type, zval *rv) +{ + if (UNEXPECTED(!offset)) { + zend_throw_error(NULL, "Cannot append to %s", ZSTR_VAL(object->ce->name)); + return NULL; + } + + dom_nodelist_dimension_index index = dom_modern_nodelist_get_index(offset); + if (UNEXPECTED(index.type == DOM_NODELIST_DIM_ILLEGAL)) { + zend_illegal_container_offset(object->ce->name, offset, type); + return NULL; + } + + if (index.type == DOM_NODELIST_DIM_STRING) { + dom_html_collection_named_item_into_zval(rv, index.str, object); + } else { + ZEND_ASSERT(index.type == DOM_NODELIST_DIM_LONG); + php_dom_nodelist_get_item_into_zval(php_dom_obj_from_obj(object)->ptr, index.lval, rv); + } + + return rv; +} + +int dom_html_collection_has_dimension(zend_object *object, zval *member, int check_empty) +{ + /* If it exists, it cannot be empty because nodes aren't empty. */ + ZEND_IGNORE_VALUE(check_empty); + + dom_nodelist_dimension_index index = dom_modern_nodelist_get_index(member); + if (UNEXPECTED(index.type == DOM_NODELIST_DIM_ILLEGAL)) { + zend_illegal_container_offset(object->ce->name, member, BP_VAR_IS); + return 0; + } + + if (index.type == DOM_NODELIST_DIM_STRING) { + return dom_html_collection_named_item(index.str, object).node != NULL; + } else { + ZEND_ASSERT(index.type == DOM_NODELIST_DIM_LONG); + return index.lval >= 0 && index.lval < php_dom_get_nodelist_length(php_dom_obj_from_obj(object)); + } } #endif diff --git a/ext/dom/html_collection.h b/ext/dom/html_collection.h new file mode 100644 index 0000000000000..a94daa1aae805 --- /dev/null +++ b/ext/dom/html_collection.h @@ -0,0 +1,23 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | https://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Niels Dossche | + +----------------------------------------------------------------------+ +*/ + +#ifndef PHP_HTML_COLLECTION_H +#define PHP_HTML_COLLECTION_H + +zval *dom_html_collection_read_dimension(zend_object *object, zval *offset, int type, zval *rv); +int dom_html_collection_has_dimension(zend_object *object, zval *member, int check_empty); + +#endif diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c index beda8939e5531..b615705b621f8 100644 --- a/ext/dom/nodelist.c +++ b/ext/dom/nodelist.c @@ -250,7 +250,7 @@ ZEND_METHOD(DOMNodeList, getIterator) zend_create_internal_iterator_zval(return_value, ZEND_THIS); } -dom_nodelist_dimension_index dom_modern_nodelist_get_index(zval *offset) +dom_nodelist_dimension_index dom_modern_nodelist_get_index(const zval *offset) { dom_nodelist_dimension_index ret; diff --git a/ext/dom/nodelist.h b/ext/dom/nodelist.h index af76e34e41dcc..72264d683f338 100644 --- a/ext/dom/nodelist.h +++ b/ext/dom/nodelist.h @@ -33,7 +33,7 @@ typedef struct _dom_nodelist_dimension_index { void php_dom_nodelist_get_item_into_zval(dom_nnodemap_object *objmap, zend_long index, zval *return_value); int php_dom_get_nodelist_length(dom_object *obj); -dom_nodelist_dimension_index dom_modern_nodelist_get_index(zval *offset); +dom_nodelist_dimension_index dom_modern_nodelist_get_index(const zval *offset); zval *dom_modern_nodelist_read_dimension(zend_object *object, zval *offset, int type, zval *rv); int dom_modern_nodelist_has_dimension(zend_object *object, zval *member, int check_empty); diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index c6cead3578d3f..978fbe96dec01 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -24,6 +24,7 @@ #if defined(HAVE_LIBXML) && defined(HAVE_DOM) #include "php_dom.h" #include "nodelist.h" +#include "html_collection.h" #include "namespace_compat.h" #include "internal_helpers.h" #include "php_dom_arginfo.h" @@ -90,6 +91,7 @@ static zend_object_handlers dom_nnodemap_object_handlers; static zend_object_handlers dom_nodelist_object_handlers; static zend_object_handlers dom_modern_nnodemap_object_handlers; static zend_object_handlers dom_modern_nodelist_object_handlers; +static zend_object_handlers dom_html_collection_object_handlers; static zend_object_handlers dom_object_namespace_node_handlers; static zend_object_handlers dom_modern_domimplementation_object_handlers; #ifdef LIBXML_XPATH_ENABLED @@ -715,6 +717,10 @@ PHP_MINIT_FUNCTION(dom) dom_modern_nodelist_object_handlers.read_dimension = dom_modern_nodelist_read_dimension; dom_modern_nodelist_object_handlers.has_dimension = dom_modern_nodelist_has_dimension; + memcpy(&dom_html_collection_object_handlers, &dom_modern_nodelist_object_handlers, sizeof(zend_object_handlers)); + dom_html_collection_object_handlers.read_dimension = dom_html_collection_read_dimension; + dom_html_collection_object_handlers.has_dimension = dom_html_collection_has_dimension; + memcpy(&dom_object_namespace_node_handlers, &dom_object_handlers, sizeof(zend_object_handlers)); dom_object_namespace_node_handlers.offset = XtOffsetOf(dom_object_namespace_node, dom.std); dom_object_namespace_node_handlers.free_obj = dom_object_namespace_node_free_storage; @@ -927,7 +933,7 @@ PHP_MINIT_FUNCTION(dom) dom_html_collection_class_entry = register_class_DOM_HTMLCollection(zend_ce_aggregate, zend_ce_countable); dom_html_collection_class_entry->create_object = dom_nnodemap_objects_new; - dom_html_collection_class_entry->default_object_handlers = &dom_modern_nodelist_object_handlers; + dom_html_collection_class_entry->default_object_handlers = &dom_html_collection_object_handlers; dom_html_collection_class_entry->get_iterator = php_dom_get_iterator; zend_hash_add_new_ptr(&classes, dom_html_collection_class_entry->name, &dom_nodelist_prop_handlers); diff --git a/ext/dom/tests/modern/html/interactions/HTMLCollection_dimension_errors.phpt b/ext/dom/tests/modern/html/interactions/HTMLCollection_dimension_errors.phpt new file mode 100644 index 0000000000000..e68d1ede3863c --- /dev/null +++ b/ext/dom/tests/modern/html/interactions/HTMLCollection_dimension_errors.phpt @@ -0,0 +1,32 @@ +--TEST-- +HTMLCollection::namedItem() and dimension handling for named accesses +--EXTENSIONS-- +dom +--FILE-- +'); + +try { + $dom->getElementsByTagName('root')[][1] = 1; +} catch (Error $e) { + echo $e->getMessage(), "\n"; +} + +try { + $dom->getElementsByTagName('root')[true]; +} catch (Error $e) { + echo $e->getMessage(), "\n"; +} + +try { + isset($dom->getElementsByTagName('root')[true]); +} catch (Error $e) { + echo $e->getMessage(), "\n"; +} + +?> +--EXPECT-- +Cannot append to DOM\HTMLCollection +Cannot access offset of type bool on DOM\HTMLCollection +Cannot access offset of type bool in isset or empty diff --git a/ext/dom/tests/modern/html/interactions/HTMLCollection_named_reads.phpt b/ext/dom/tests/modern/html/interactions/HTMLCollection_named_reads.phpt index 708f618d1a95a..82883e4ceec03 100644 --- a/ext/dom/tests/modern/html/interactions/HTMLCollection_named_reads.phpt +++ b/ext/dom/tests/modern/html/interactions/HTMLCollection_named_reads.phpt @@ -22,20 +22,61 @@ $xml = <<getElementsByTagName('node')->namedItem('foo')?->textContent); -var_dump($dom->getElementsByTagName('node')->namedItem('')?->textContent); -var_dump($dom->getElementsByTagName('node')->namedItem('does not exist')?->textContent); -var_dump($dom->getElementsByTagName('node')->namedItem('wrong')?->textContent); -var_dump($dom->getElementsByTagName('node')->namedItem('bar')?->textContent); -var_dump($dom->getElementsByTagName('x')->namedItem('foo')?->textContent); -var_dump($dom->getElementsByTagName('x')->namedItem('footest')?->textContent); + +function test($obj, $name) { + echo "--- Query \"$name\" ---\n"; + var_dump($obj->namedItem($name)?->textContent); + var_dump($obj[$name]?->textContent); + var_dump(isset($obj[$name])); + + // Search to check for dimension access consistency + $node = $obj[$name]; + if ($node) { + $found = false; + for ($i = 0; $i < $obj->length && !$found; $i++) { + $found = $obj[$i] === $node; + } + if (!$found) { + throw new Error('inconsistency in dimension access'); + } + } +} + +test($dom->getElementsByTagName('node'), 'foo'); +test($dom->getElementsByTagName('node'), ''); +test($dom->getElementsByTagName('node'), 'does not exist'); +test($dom->getElementsByTagName('node'), 'wrong'); +test($dom->getElementsByTagName('node'), 'bar'); +test($dom->getElementsByTagName('x'), 'foo'); +test($dom->getElementsByTagName('x'), 'footest'); ?> --EXPECT-- +--- Query "foo" --- string(1) "5" +string(1) "5" +bool(true) +--- Query "" --- +NULL NULL +bool(false) +--- Query "does not exist" --- NULL +NULL +bool(false) +--- Query "wrong" --- +string(1) "4" string(1) "4" +bool(true) +--- Query "bar" --- +string(12) "with html ns" string(12) "with html ns" +bool(true) +--- Query "foo" --- string(1) "2" +string(1) "2" +bool(true) +--- Query "footest" --- +string(13) "2 with entity" string(13) "2 with entity" +bool(true)