Skip to content

Commit 0e34ac8

Browse files
committed
Fix bug #77686: Removed elements are still returned by getElementById
From the moment an ID is created, libxml2's behaviour is to cache that element, even if that element is not yet attached to the document. Similarly, only upon destruction of the element the ID is actually removed by libxml2. Since libxml2 has such behaviour deeply ingrained in the library, and uses the cache for various purposes, it seems like a bad idea and lost cause to fight it. Instead, we'll simply walk the tree upwards to check if the node is attached to the document. Closes GH-11369.
1 parent 23f7002 commit 0e34ac8

File tree

3 files changed

+62
-1
lines changed

3 files changed

+62
-1
lines changed

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ PHP NEWS
2525
namespaces). (nielsdos)
2626
. Fixed bug #81642 (DOMChildNode::replaceWith() bug when replacing a node
2727
with itself). (nielsdos)
28+
. Fixed bug #77686 (Removed elements are still returned by getElementById).
29+
(nielsdos)
2830

2931
- Opcache:
3032
. Fix allocation loop in zend_shared_alloc_startup(). (nielsdos)

ext/dom/document.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1008,6 +1008,19 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS)
10081008
}
10091009
/* }}} end dom_document_get_elements_by_tag_name_ns */
10101010

1011+
static bool php_dom_is_node_attached(const xmlNode *node)
1012+
{
1013+
ZEND_ASSERT(node != NULL);
1014+
node = node->parent;
1015+
while (node != NULL) {
1016+
if (node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE) {
1017+
return true;
1018+
}
1019+
node = node->parent;
1020+
}
1021+
return false;
1022+
}
1023+
10111024
/* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-getElBId
10121025
Since: DOM Level 2
10131026
*/
@@ -1030,7 +1043,13 @@ PHP_METHOD(DOMDocument, getElementById)
10301043

10311044
attrp = xmlGetID(docp, (xmlChar *) idname);
10321045

1033-
if (attrp && attrp->parent) {
1046+
/* From the moment an ID is created, libxml2's behaviour is to cache that element, even
1047+
* if that element is not yet attached to the document. Similarly, only upon destruction of
1048+
* the element the ID is actually removed by libxml2. Since libxml2 has such behaviour deeply
1049+
* ingrained in the library, and uses the cache for various purposes, it seems like a bad
1050+
* idea and lost cause to fight it. Instead, we'll simply walk the tree upwards to check
1051+
* if the node is attached to the document. */
1052+
if (attrp && attrp->parent && php_dom_is_node_attached(attrp->parent)) {
10341053
DOM_RET_OBJ((xmlNodePtr) attrp->parent, &ret, intern);
10351054
} else {
10361055
RETVAL_NULL();

ext/dom/tests/bug77686.phpt

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
--TEST--
2+
Bug #77686 (Removed elements are still returned by getElementById)
3+
--EXTENSIONS--
4+
dom
5+
--FILE--
6+
<?php
7+
8+
$doc = new DOMDocument;
9+
$doc->loadHTML('<html id="htmlelement"><body id="x">before<div id="y">hello</div>after</body></html>');
10+
$body = $doc->getElementById('x');
11+
$div = $doc->getElementById('y');
12+
var_dump($doc->getElementById('y')->textContent);
13+
14+
// Detached from document, should not find it anymore
15+
$body->removeChild($div);
16+
var_dump($doc->getElementById('y'));
17+
18+
// Added again, should find it
19+
$body->appendChild($div);
20+
var_dump($doc->getElementById('y')->textContent);
21+
22+
// Should find root element without a problem
23+
var_dump($doc->getElementById('htmlelement')->textContent);
24+
25+
// Created element but not yet attached, should not find it before it is added
26+
$new_element = $doc->createElement('p');
27+
$new_element->textContent = 'my new text';
28+
$new_element->setAttribute('id', 'myp');
29+
var_dump($doc->getElementById('myp'));
30+
$body->appendChild($new_element);
31+
var_dump($doc->getElementById('myp')->textContent);
32+
33+
?>
34+
--EXPECT--
35+
string(5) "hello"
36+
NULL
37+
string(5) "hello"
38+
string(16) "beforeafterhello"
39+
NULL
40+
string(11) "my new text"

0 commit comments

Comments
 (0)