Skip to content

Commit 607a091

Browse files
committed
Account for Etree Elements in HTML Stash
By calling str on all stash elements we ensure they don't raise an error. Worse case, soemthing like `<Element 'div' at 0x000001B2DAE94900>` gets inserted into the output. However, with the override in the md_in_html extension, we actually serialize and reinsert the original HTML. Worse case, an HTML block which should be parsed as Markdown gets skipped by the extension (`<div markdown="block"></div>` gets inserting into the output). The tricky part is testing as there should be no known cases where this ever occurs. Therefore, we forefully pass an etree Element directly to the method in the test. That said, as #1040 is unresolved at this point, I have tested locally with a real existing case and it works well. Related to #1040.
1 parent b4a399c commit 607a091

File tree

3 files changed

+33
-2
lines changed

3 files changed

+33
-2
lines changed

markdown/extensions/md_in_html.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from . import Extension
1818
from ..blockprocessors import BlockProcessor
1919
from ..preprocessors import Preprocessor
20+
from ..postprocessors import RawHtmlPostprocessor
2021
from .. import util
2122
from ..htmlparser import HTMLExtractor
2223
import xml.etree.ElementTree as etree
@@ -263,6 +264,15 @@ def run(self, parent, blocks):
263264
return False
264265

265266

267+
class MarkdownInHTMLPostprocessor(RawHtmlPostprocessor):
268+
def stash_to_string(self, text):
269+
""" Override default to handle any etree elements still in the stash. """
270+
if isinstance(text, etree.Element):
271+
return self.md.serializer(text)
272+
else:
273+
return str(text)
274+
275+
266276
class MarkdownInHtmlExtension(Extension):
267277
"""Add Markdown parsing in HTML to Markdown class."""
268278

@@ -275,6 +285,8 @@ def extendMarkdown(self, md):
275285
md.parser.blockprocessors.register(
276286
MarkdownInHtmlProcessor(md.parser), 'markdown_block', 105
277287
)
288+
# Replace raw HTML postprocessor
289+
md.postprocessors.register(MarkdownInHTMLPostprocessor(md), 'raw_html', 30)
278290

279291

280292
def makeExtension(**kwargs): # pragma: no cover

markdown/postprocessors.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def run(self, text):
6969
""" Iterate over html stash and restore html. """
7070
replacements = OrderedDict()
7171
for i in range(self.md.htmlStash.html_counter):
72-
html = self.md.htmlStash.rawHtmlBlocks[i]
72+
html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i])
7373
if self.isblocklevel(html):
7474
replacements["<p>{}</p>".format(
7575
self.md.htmlStash.get_placeholder(i))] = html
@@ -95,6 +95,10 @@ def isblocklevel(self, html):
9595
return self.md.is_block_level(m.group(1))
9696
return False
9797

98+
def stash_to_string(self, text):
99+
""" Convert a stashed object to a string. """
100+
return str(text)
101+
98102

99103
class AndSubstitutePostprocessor(Postprocessor):
100104
""" Restore valid entities """

tests/test_syntax/extensions/test_md_in_html.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,21 @@
2323
from unittest import TestSuite
2424
from markdown.test_tools import TestCase
2525
from ..blocks.test_html_blocks import TestHTMLBlocks
26+
from markdown import Markdown
27+
from xml.etree.ElementTree import Element
28+
29+
30+
class TestMarkdownInHTMLPostProcessor(TestCase):
31+
""" Ensure any remaining elements in HTML stash are properly serialized. """
32+
33+
def test_stash_to_string(self):
34+
# There should be no known cases where this actually happens so we need to
35+
# forcefully pass an etree Element to the method to ensure proper behavior.
36+
element = Element('div')
37+
element.text = 'Foo bar.'
38+
md = Markdown(extensions=['md_in_html'])
39+
result = md.postprocessors['raw_html'].stash_to_string(element)
40+
self.assertEqual(result, '<div>Foo bar.</div>')
2641

2742

2843
class TestDefaultwMdInHTML(TestHTMLBlocks):
@@ -758,7 +773,7 @@ def test_md1_nested_footnote_ref(self):
758773
def load_tests(loader, tests, pattern):
759774
''' Ensure TestHTMLBlocks doesn't get run twice by excluding it here. '''
760775
suite = TestSuite()
761-
for test_class in [TestDefaultwMdInHTML, TestMdInHTML]:
776+
for test_class in [TestDefaultwMdInHTML, TestMdInHTML, TestMarkdownInHTMLPostProcessor]:
762777
tests = loader.loadTestsFromTestCase(test_class)
763778
suite.addTests(tests)
764779
return suite

0 commit comments

Comments
 (0)