From 692e9b74f063c717901108ac5479111b88824b15 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 3 Jan 2024 12:04:10 -0500 Subject: [PATCH 1/3] Fix handling of bogus comments. As with most implementations, we now pass through bogus comments (as defined by the HTML Spec) unaltered except that they are HTML escaped. This deviates from the reference implementation which compeltely ignores them. As the reference implementation seems to not have even contemplated their existance, it is not being used as a reference in this instance. Fixes #1425. --- docs/changelog.md | 1 + markdown/htmlparser.py | 9 +++++++++ tests/test_syntax/blocks/test_html_blocks.py | 16 ++++++++-------- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 84f0bfaaf..53104f60d 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Fix edge-case crash in `InlineProcessor` with `AtomicString` (#1406). * Fix edge-case crash in `codehilite` with an empty `code` tag (#1405). * Improve and expand type annotations in the code base (#1401). +* Fix handling of bogus comments (#1425). ## [3.5.1] -- 2023-10-31 diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index 5155ef69d..902746f5a 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -277,6 +277,15 @@ def parse_html_declaration(self, i: int) -> int: self.handle_data(' int: + # Override the default dehavior so that bogus comments get passed + # through unaltered by setting `report` to `0` (see #1425). + pos = super().parse_bogus_comment(i, report) + if pos == -1: + return -1 + self.handle_empty_tag(self.rawdata[i:pos], is_block=False) + return pos + # The rest has been copied from base class in standard lib to address #1036. # As `__startag_text` is private, all references to it must be in this subclass. # The last few lines of `parse_starttag` are reversed so that `handle_starttag` diff --git a/tests/test_syntax/blocks/test_html_blocks.py b/tests/test_syntax/blocks/test_html_blocks.py index 22b949834..85b0c48de 100644 --- a/tests/test_syntax/blocks/test_html_blocks.py +++ b/tests/test_syntax/blocks/test_html_blocks.py @@ -782,16 +782,16 @@ def test_raw_comment_trailing_whitespace(self): '' ) - # Note: this is a change in behavior for Python-Markdown, which does *not* match the reference - # implementation. However, it does match the HTML5 spec. Declarations must start with either - # `', - '' + '', + '

<!invalid>

' + ) + + def test_bogus_comment_endtag(self): + self.assertMarkdownRenders( + '', + '

</#invalid>

' ) def test_raw_multiline_comment(self): From 31729bacdbb9ece60b4bf6473175958a016aa2fb Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 3 Jan 2024 12:12:29 -0500 Subject: [PATCH 2/3] lint cleanup --- markdown/htmlparser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index 902746f5a..c9fa0b387 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -277,8 +277,8 @@ def parse_html_declaration(self, i: int) -> int: self.handle_data(' int: - # Override the default dehavior so that bogus comments get passed + def parse_bogus_comment(self, i: int, report: int = 0) -> int: + # Override the default behavior so that bogus comments get passed # through unaltered by setting `report` to `0` (see #1425). pos = super().parse_bogus_comment(i, report) if pos == -1: From 20126c0e12340af53559955601c576177df8c930 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 3 Jan 2024 13:01:37 -0500 Subject: [PATCH 3/3] coverage --- markdown/htmlparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index c9fa0b387..33b918d54 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -281,7 +281,7 @@ def parse_bogus_comment(self, i: int, report: int = 0) -> int: # Override the default behavior so that bogus comments get passed # through unaltered by setting `report` to `0` (see #1425). pos = super().parse_bogus_comment(i, report) - if pos == -1: + if pos == -1: # pragma: no cover return -1 self.handle_empty_tag(self.rawdata[i:pos], is_block=False) return pos