From 3cb912834cd310b3b79cb4921e8ce89e007dde61 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 6 Apr 2024 14:22:27 +0200 Subject: [PATCH] Cleanup and optimize attribute value reading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the attribute has a single text child, we can avoid an allocating call to libxml2 and read the contents directly. On my i7-4790, I tested the optimization with both the $value and $nodeValue property. ``` Summary ./sapi/cli/php bench_value.php ran 1.82 ± 0.09 times faster than ./sapi/cli/php_old bench_value.php Summary ./sapi/cli/php bench_nodeValue.php ran 1.78 ± 0.10 times faster than ./sapi/cli/php_old bench_nodeValue.php ``` Test code: ``` $dom = new DOMDocument; $dom->loadXML(''); $attrib = $dom->documentElement->attributes[0]; for ($i=0; $i<1000*1000; $i++) { $attrib->value; // or nodeValue } ``` --- ext/dom/attr.c | 14 ++-------- ext/dom/php_dom.c | 66 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/ext/dom/attr.c b/ext/dom/attr.c index 302e52acb83c..e48d124976c2 100644 --- a/ext/dom/attr.c +++ b/ext/dom/attr.c @@ -113,19 +113,9 @@ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#ID-22 */ zend_result dom_attr_value_read(dom_object *obj, zval *retval) { - DOM_PROP_NODE(xmlAttrPtr, attrp, obj); - xmlChar *content; - - /* Can't avoid a content copy because it's an attribute node */ - if ((content = xmlNodeGetContent((xmlNodePtr) attrp)) != NULL) { - ZVAL_STRING(retval, (char *) content); - xmlFree(content); - } else { - ZVAL_EMPTY_STRING(retval); - } - + DOM_PROP_NODE(xmlNodePtr, attrp, obj); + php_dom_get_content_into_zval(attrp, retval, false); return SUCCESS; - } zend_result dom_attr_value_write(dom_object *obj, zval *newval) diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 711ea29d125f..dce9a0fb490c 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -2255,36 +2255,60 @@ void dom_remove_all_children(xmlNodePtr nodep) } } -void php_dom_get_content_into_zval(const xmlNode *nodep, zval *retval, bool default_is_null) +void php_dom_get_content_into_zval(const xmlNode *nodep, zval *return_value, bool null_on_failure) { ZEND_ASSERT(nodep != NULL); - if (nodep->type == XML_TEXT_NODE - || nodep->type == XML_CDATA_SECTION_NODE - || nodep->type == XML_PI_NODE - || nodep->type == XML_COMMENT_NODE) { - char *str = (char * ) nodep->content; - if (str != NULL) { - ZVAL_STRING(retval, str); - } else { - goto failure; + switch (nodep->type) { + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: { + char *str = (char * ) nodep->content; + if (str != NULL) { + RETURN_STRING(str); + } + + break; } - return; - } - char *str = (char *) xmlNodeGetContent(nodep); + case XML_ATTRIBUTE_NODE: { + /* For attributes we can also have an optimized fast-path. + * This fast-path is only possible in the (common) case where the attribute + * has a single text child. Note that if the child or the content is NULL, this + * is equivalent to not having content (i.e. the attribute has the empty string as value). */ - if (str != NULL) { - ZVAL_STRING(retval, str); - xmlFree(str); - return; + if (nodep->children == NULL) { + RETURN_EMPTY_STRING(); + } + + if (nodep->children->type == XML_TEXT_NODE && nodep->children->next == NULL) { + if (nodep->children->content == NULL) { + RETURN_EMPTY_STRING(); + } else { + RETURN_STRING((const char *) nodep->children->content); + } + } + + ZEND_FALLTHROUGH; + } + + default: { + char *str = (char *) xmlNodeGetContent(nodep); + if (str != NULL) { + RETVAL_STRING(str); + xmlFree(str); + return; + } + + break; + } } -failure: - if (default_is_null) { - ZVAL_NULL(retval); + if (null_on_failure) { + RETURN_NULL(); } else { - ZVAL_EMPTY_STRING(retval); + RETURN_EMPTY_STRING(); } }