From 07459c6f7276f7f615bd91f0f861c5181e1bfd84 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Wed, 7 Oct 2020 12:45:43 +0200 Subject: [PATCH 1/3] Fix #77040: tidyNode::isHtml() is completely broken The documentation of `tidyNode::isHtml()` states that this method "checks if a node is part of a HTML document". That is, of course, nonsense, since a tidyNode is "an HTML node in an HTML file, as detected by tidy." What this method is actually supposed to do is to check whether a node is an element (unless it is the root element). This has been broken by commit d8eeb8e[1], which assumed that `enum TidyNodeType` would represent flags of a bitmask, what it does not. [1] --- ext/tidy/tests/bug77040.phpt | 17 +++++++++++++++++ ext/tidy/tidy.c | 13 ++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 ext/tidy/tests/bug77040.phpt diff --git a/ext/tidy/tests/bug77040.phpt b/ext/tidy/tests/bug77040.phpt new file mode 100644 index 0000000000000..6357fe3243cce --- /dev/null +++ b/ext/tidy/tests/bug77040.phpt @@ -0,0 +1,17 @@ +--TEST-- +Bug #77040 (tidyNode::isHtml() is completely broken) +--SKIPIF-- + +--FILE-- +parseString("

text

"); +$p = $tidy->body()->child[0]->child[0]; +var_dump($p->type === TIDY_NODETYPE_TEXT); +var_dump($p->isHtml()); +?> +--EXPECT-- +bool(true) +bool(false) diff --git a/ext/tidy/tidy.c b/ext/tidy/tidy.c index 0fde23cb11b48..1397625477ea1 100644 --- a/ext/tidy/tidy.c +++ b/ext/tidy/tidy.c @@ -1785,12 +1785,15 @@ static TIDY_NODE_METHOD(isComment) static TIDY_NODE_METHOD(isHtml) { TIDY_FETCH_ONLY_OBJECT; - - if (tidyNodeGetType(obj->node) & (TidyNode_Start | TidyNode_End | TidyNode_StartEnd)) { - RETURN_TRUE; + int type; + switch (type = tidyNodeGetType(obj->node)) { + case TidyNode_Start: + case TidyNode_End: + case TidyNode_StartEnd: + RETURN_TRUE; + default: + RETURN_FALSE; } - - RETURN_FALSE; } /* }}} */ From c2da71d17d8bb9663d9d7ef24468a9f8e6d15c17 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Wed, 7 Oct 2020 15:02:13 +0200 Subject: [PATCH 2/3] Remove debug code --- ext/tidy/tidy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/tidy/tidy.c b/ext/tidy/tidy.c index 1397625477ea1..60170585ce591 100644 --- a/ext/tidy/tidy.c +++ b/ext/tidy/tidy.c @@ -1785,8 +1785,8 @@ static TIDY_NODE_METHOD(isComment) static TIDY_NODE_METHOD(isHtml) { TIDY_FETCH_ONLY_OBJECT; - int type; - switch (type = tidyNodeGetType(obj->node)) { + + switch (tidyNodeGetType(obj->node)) { case TidyNode_Start: case TidyNode_End: case TidyNode_StartEnd: From 6e7a6bc785529f27f4622aca4c9208d34e01e38f Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Wed, 7 Oct 2020 15:13:33 +0200 Subject: [PATCH 3/3] Add further test clauses --- ext/tidy/tests/bug77040.phpt | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/ext/tidy/tests/bug77040.phpt b/ext/tidy/tests/bug77040.phpt index 6357fe3243cce..a1af00bdfb70f 100644 --- a/ext/tidy/tests/bug77040.phpt +++ b/ext/tidy/tests/bug77040.phpt @@ -7,11 +7,21 @@ if (!extension_loaded('tidy')) die('skip tidy extension not available'); --FILE-- parseString("

text

"); -$p = $tidy->body()->child[0]->child[0]; -var_dump($p->type === TIDY_NODETYPE_TEXT); +$tidy->parseString("

text

"); +$p = $tidy->body()->child[0]; +var_dump($p->type === TIDY_NODETYPE_START); var_dump($p->isHtml()); +$text = $p->child[0]; +var_dump($text->type === TIDY_NODETYPE_TEXT); +var_dump($text->isHtml()); +$cdata = $tidy->body()->child[1]->child[0]; +var_dump($cdata->type === TIDY_NODETYPE_CDATA); +var_dump($cdata->isHtml()); ?> --EXPECT-- bool(true) +bool(true) +bool(true) bool(false) +bool(true) +bool(false) \ No newline at end of file