diff --git a/ext/dom/lexbor/lexbor/selectors-adapted/selectors.c b/ext/dom/lexbor/lexbor/selectors-adapted/selectors.c index 8bd996a3910fe..3a40318628f10 100644 --- a/ext/dom/lexbor/lexbor/selectors-adapted/selectors.c +++ b/ext/dom/lexbor/lexbor/selectors-adapted/selectors.c @@ -65,7 +65,21 @@ static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_id(const xml static zend_always_inline const xmlAttr *lxb_selectors_adapted_attr(const xmlNode *node, const lxb_char_t *name) { - const xmlAttr *attr = xmlHasProp(node, (const xmlChar *) name); + const xmlAttr *attr = NULL; + ZEND_ASSERT(node->doc != NULL); + if (php_dom_ns_is_html_and_document_is_html(node)) { + /* No need to handle DTD entities as we're in HTML. */ + size_t name_bound = strlen((const char *) name) + 1; + for (const xmlAttr *cur = node->properties; cur != NULL; cur = cur->next) { + if (lexbor_str_data_nlocmp_right(cur->name, name, name_bound)) { + attr = cur; + break; + } + } + } else { + attr = xmlHasProp(node, (const xmlChar *) name); + } + if (attr != NULL && attr->ns != NULL) { return NULL; } @@ -85,8 +99,67 @@ static zend_always_inline dom_lxb_str_wrapper lxb_selectors_adapted_attr_value(c return ret; } +static bool lxb_selectors_attrib_name_cmp(const lxb_css_selector_t *selector, const char *name, size_t len) +{ + return selector->name.length == len && lexbor_str_data_nlocmp_right((const lxb_char_t *) name, selector->name.data, len); +} + +/* From https://html.spec.whatwg.org/#case-sensitivity-of-selectors + * "Attribute selectors on an HTML element in an HTML document must treat the values of attributes with the following names as ASCII case-insensitive:" */ +static bool lxb_selectors_is_lowercased_html_attrib_name(const lxb_css_selector_t *selector) +{ + return lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("accept")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("accept-charset")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("align")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("alink")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("axis")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("bgcolor")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("charset")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("checked")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("clear")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("codetype")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("color")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("compact")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("declare")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("defer")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("dir")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("direction")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("disabled")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("enctype")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("face")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("frame")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("hreflang")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("http-equiv")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("lang")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("language")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("link")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("media")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("method")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("multiple")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("nohref")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("noresize")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("noshade")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("nowrap")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("readonly")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rel")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rev")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rules")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("scope")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("scrolling")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("selected")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("shape")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("target")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("text")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("type")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("valign")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("valuetype")) + || lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("vlink")); +} + static void lxb_selectors_adapted_set_entry_id_ex(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node) { + entry->id.attr_case_insensitive = lxb_selectors_is_lowercased_html_attrib_name(selector); + if (node->doc != NULL && node->doc->dict != NULL) { const xmlChar *interned = xmlDictExists(node->doc->dict, selector->name.data, selector->name.length); if (interned != NULL) { @@ -1290,10 +1363,10 @@ lxb_selectors_match_class(const lexbor_str_t *target, const lexbor_str_t *src, } static bool -lxb_selectors_match_attribute_value(const lxb_css_selector_attribute_t *attr, const lexbor_str_t *trg, const lexbor_str_t *src) +lxb_selectors_match_attribute_value(const lxb_css_selector_attribute_t *attr, bool force_modifier_i, const lexbor_str_t *trg, const lexbor_str_t *src) { bool res; - bool ins = attr->modifier == LXB_CSS_SELECTOR_MODIFIER_I; + bool ins = attr->modifier == LXB_CSS_SELECTOR_MODIFIER_I || force_modifier_i; switch (attr->match) { case LXB_CSS_SELECTOR_MATCH_EQUAL: /* = */ @@ -1405,7 +1478,13 @@ lxb_selectors_match_attribute(const lxb_css_selector_t *selector, } dom_lxb_str_wrapper trg = lxb_selectors_adapted_attr_value(dom_attr); - bool res = lxb_selectors_match_attribute_value(attr, &trg.str, src); + ZEND_ASSERT(node->doc != NULL); + bool res = lxb_selectors_match_attribute_value( + attr, + entry->id.attr_case_insensitive && php_dom_ns_is_html_and_document_is_html(node), + &trg.str, + src + ); dom_lxb_str_wrapper_release(&trg); return res; } diff --git a/ext/dom/lexbor/lexbor/selectors-adapted/selectors.h b/ext/dom/lexbor/lexbor/selectors-adapted/selectors.h index 441976b1e3d41..9057fae684189 100644 --- a/ext/dom/lexbor/lexbor/selectors-adapted/selectors.h +++ b/ext/dom/lexbor/lexbor/selectors-adapted/selectors.h @@ -78,6 +78,7 @@ typedef lxb_selectors_entry_t * typedef struct { const xmlChar *name; bool interned; + bool attr_case_insensitive; } lxb_selectors_adapted_id; struct lxb_selectors_entry { diff --git a/ext/dom/tests/modern/css_selectors/gh17802.phpt b/ext/dom/tests/modern/css_selectors/gh17802.phpt new file mode 100644 index 0000000000000..2af1dab622836 --- /dev/null +++ b/ext/dom/tests/modern/css_selectors/gh17802.phpt @@ -0,0 +1,63 @@ +--TEST-- +GH-17802 (\Dom\HTMLDocument querySelector attribute name is case sensitive in HTML) +--EXTENSIONS-- +dom +--FILE-- + + + + + + + +TEXT; + +$dom = \Dom\HTMLDocument::createFromString($text, options: LIBXML_NOERROR); +$meta2 = $dom->head->appendChild($dom->createElementNS('urn:x', 'meta')); +$meta2->setAttribute('charset', 'x'); +echo $dom->saveHtml(), "\n"; + +echo "--- charseT ---\n"; + +foreach ($dom->querySelectorAll('meta[charseT]') as $entry) { + var_dump($dom->saveHtml($entry)); +} + +echo "--- charset ---\n"; + +foreach ($dom->querySelectorAll('meta[charset]') as $entry) { + var_dump($dom->saveHtml($entry)); +} + +echo "--- charseT and lowercase value ---\n"; + +foreach ($dom->querySelectorAll('meta[charseT="windows-1252"]') as $entry) { + var_dump($dom->saveHtml($entry)); +} + +echo "--- charset and lowercase value ---\n"; + +foreach ($dom->querySelectorAll('meta[charset="windows-1252"]') as $entry) { + var_dump($dom->saveHtml($entry)); +} + +?> +--EXPECT-- + + + + + + +--- charseT --- +string(29) "" +--- charset --- +string(29) "" +string(25) "" +--- charseT and lowercase value --- +string(29) "" +--- charset and lowercase value --- +string(29) ""