From fcd2a55a6c1f90256e4461b2e3290d4984d9311e Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Tue, 17 Nov 2020 14:12:39 -0500 Subject: [PATCH 1/6] Properly parse code spans in md_in_html This reverts part of 2766698 and causes other tests to break. Although, those should be addressed in the same manner as in the core (by using the intail attribute). A partial fix for 1068. --- markdown/extensions/md_in_html.py | 15 ++-------- .../test_syntax/extensions/test_md_in_html.py | 30 +++++++++++++++++++ 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index eb8902e07..3bd62826e 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -85,17 +85,9 @@ def get_state(self, tag, attrs): else: # pragma: no cover return None - def at_line_start(self): - """At line start.""" - - value = super().at_line_start() - if not value and self.cleandoc and self.cleandoc[-1].endswith('\n'): - value = True - return value - def handle_starttag(self, tag, attrs): # Handle tags that should always be empty and do not specify a closing tag - if tag in self.empty_tags: + if tag in self.empty_tags and self.at_line_start(): attrs = {key: value if value is not None else key for key, value in attrs} if "markdown" in attrs: attrs.pop('markdown') @@ -106,13 +98,12 @@ def handle_starttag(self, tag, attrs): self.handle_empty_tag(data, True) return - if tag in self.block_level_tags: + if tag in self.block_level_tags and self.at_line_start(): # Valueless attr (ex: ``) results in `[('checked', None)]`. # Convert to `{'checked': 'checked'}`. attrs = {key: value if value is not None else key for key, value in attrs} state = self.get_state(tag, attrs) - - if self.inraw or (state in [None, 'off'] and not self.mdstack) or not self.at_line_start(): + if self.inraw or (state in [None, 'off'] and not self.mdstack): # fall back to default behavior attrs.pop('markdown', None) super().handle_starttag(tag, attrs) diff --git a/tests/test_syntax/extensions/test_md_in_html.py b/tests/test_syntax/extensions/test_md_in_html.py index 824917c06..519fee070 100644 --- a/tests/test_syntax/extensions/test_md_in_html.py +++ b/tests/test_syntax/extensions/test_md_in_html.py @@ -126,6 +126,36 @@ def test_md1_div_linebreaks(self): ) ) + def test_md1_code_span(self): + self.assertMarkdownRenders( + self.dedent( + """ +
+ `

code span

` +
+ """ + ), + self.dedent( + """ +
+

<h1>code span</h1>

+
+ """ + ) + ) + + def test_md1_code_span_oneline(self): + self.assertMarkdownRenders( + '
`

code span

`
', + self.dedent( + """ +
+

<h1>code span</h1>

+
+ """ + ) + ) + def test_md1_div_blank_lines(self): self.assertMarkdownRenders( self.dedent( From 37386563ee45f5efb42df51e481db0ffc00f5c5a Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Tue, 17 Nov 2020 15:15:47 -0500 Subject: [PATCH 2/6] handle tails in md_in_html --- markdown/extensions/md_in_html.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 3bd62826e..259420f38 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -19,7 +19,7 @@ from ..preprocessors import Preprocessor from ..postprocessors import RawHtmlPostprocessor from .. import util -from ..htmlparser import HTMLExtractor +from ..htmlparser import HTMLExtractor, blank_line_re import xml.etree.ElementTree as etree @@ -87,7 +87,7 @@ def get_state(self, tag, attrs): def handle_starttag(self, tag, attrs): # Handle tags that should always be empty and do not specify a closing tag - if tag in self.empty_tags and self.at_line_start(): + if tag in self.empty_tags and (self.at_line_start() or self.intail): attrs = {key: value if value is not None else key for key, value in attrs} if "markdown" in attrs: attrs.pop('markdown') @@ -98,7 +98,7 @@ def handle_starttag(self, tag, attrs): self.handle_empty_tag(data, True) return - if tag in self.block_level_tags and self.at_line_start(): + if tag in self.block_level_tags and (self.at_line_start() or self.intail): # Valueless attr (ex: ``) results in `[('checked', None)]`. # Convert to `{'checked': 'checked'}`. attrs = {key: value if value is not None else key for key, value in attrs} @@ -150,6 +150,11 @@ def handle_endtag(self, tag): self.cleandoc.append(self.md.htmlStash.store(element)) self.cleandoc.append('\n\n') self.state = [] + # Check if element has a tail + if not blank_line_re.match( + self.rawdata[self.line_offset + self.offset + len(self.get_endtag_text(tag)):]): + # More content exists after endtag. + self.intail = True else: # Treat orphan closing tag as a span level tag. text = self.get_endtag_text(tag) @@ -182,6 +187,8 @@ def handle_startendtag(self, tag, attrs): self.handle_empty_tag(data, is_block=self.md.is_block_level(tag)) def handle_data(self, data): + if self.intail and '\n' in data: + self.intail = False if self.inraw or not self.mdstack: super().handle_data(data) else: From 99cbfbe7698a0f0fcb445fb037a1c0c4f620b6b7 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 18 Nov 2020 09:32:04 -0500 Subject: [PATCH 3/6] Ensure line_offset doesn't error See also #1066. --- markdown/htmlparser.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index fee9cd509..b0f113ba1 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -91,8 +91,14 @@ def close(self): @property def line_offset(self): """Returns char index in self.rawdata for the start of the current line. """ - if self.lineno > 1: - return re.match(r'([^\n]*\n){{{}}}'.format(self.lineno-1), self.rawdata).end() + if self.lineno > 1 and '\n' in self.rawdata: + m = re.match(r'([^\n]*\n){{{}}}'.format(self.lineno-1), self.rawdata) + if m: + return m.end() + else: + # Value of self.lineno must exceed total number of lines. + # Find index of begining of last line. + return self.rawdata.rfind('\n') return 0 def at_line_start(self): From a8a1527458b64c685c3efccd27748d0317dce655 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 18 Nov 2020 11:37:50 -0500 Subject: [PATCH 4/6] Handle script tag in code span in md_in_html --- markdown/extensions/md_in_html.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 259420f38..b8848efdd 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -125,6 +125,9 @@ def handle_starttag(self, tag, attrs): self.handle_data(self.md.htmlStash.store(text)) else: self.handle_data(text) + if tag in self.CDATA_CONTENT_ELEMENTS: + # This is presumably a standalone tag in a code span (see #1036). + self.clear_cdata_mode() def handle_endtag(self, tag): if tag in self.block_level_tags: From 80e313d848cdadf758d78f0cb2ebffa55bc7f627 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 18 Nov 2020 11:40:50 -0500 Subject: [PATCH 5/6] mend --- .../test_syntax/extensions/test_md_in_html.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/test_syntax/extensions/test_md_in_html.py b/tests/test_syntax/extensions/test_md_in_html.py index 519fee070..7786b80e5 100644 --- a/tests/test_syntax/extensions/test_md_in_html.py +++ b/tests/test_syntax/extensions/test_md_in_html.py @@ -156,6 +156,42 @@ def test_md1_code_span_oneline(self): ) ) + def test_md1_code_span_unclosed(self): + self.assertMarkdownRenders( + self.dedent( + """ +
+ `

` +

+ """ + ), + self.dedent( + """ +
+

<p>

+
+ """ + ) + ) + + def test_md1_code_span_script_tag(self): + self.assertMarkdownRenders( + self.dedent( + """ +
+ `