Skip to content

Commit 2766698

Browse files
authored
Properly parse inline HTML in md_in_html
Fixes #1040 and fixes #1045.
1 parent 607a091 commit 2766698

File tree

2 files changed

+191
-5
lines changed

2 files changed

+191
-5
lines changed

markdown/extensions/md_in_html.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,22 @@ def get_state(self, tag, attrs):
8686
else: # pragma: no cover
8787
return None
8888

89+
def at_line_start(self):
90+
"""At line start."""
91+
92+
value = super().at_line_start()
93+
if not value and self.cleandoc and self.cleandoc[-1].endswith('\n'):
94+
value = True
95+
return value
96+
8997
def handle_starttag(self, tag, attrs):
9098
if tag in block_level_tags:
9199
# Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`.
92100
# Convert to `{'checked': 'checked'}`.
93101
attrs = {key: value if value is not None else key for key, value in attrs}
94102
state = self.get_state(tag, attrs)
95103

96-
if self.inraw or (state in [None, 'off'] and not self.mdstack):
104+
if self.inraw or (state in [None, 'off'] and not self.mdstack) or not self.at_line_start():
97105
# fall back to default behavior
98106
attrs.pop('markdown', None)
99107
super().handle_starttag(tag, attrs)
@@ -111,7 +119,10 @@ def handle_starttag(self, tag, attrs):
111119
super().handle_starttag(tag, attrs)
112120
else:
113121
text = self.get_starttag_text()
114-
self.handle_data(text)
122+
if self.mdstate and self.mdstate[-1] == "off":
123+
self.handle_data(self.md.htmlStash.store(text))
124+
else:
125+
self.handle_data(text)
115126

116127
def handle_endtag(self, tag):
117128
if tag in block_level_tags:
@@ -128,20 +139,32 @@ def handle_endtag(self, tag):
128139
if not self.mdstack:
129140
# Last item in stack is closed. Stash it
130141
element = self.get_element()
142+
# Get last entry to see if it ends in newlines
143+
# If it is an element, assume there is no newlines
144+
item = self.cleandoc[-1] if self.cleandoc else ''
145+
# If we only have one newline before block element, add another
146+
if not item.endswith('\n\n') and item.endswith('\n'):
147+
self.cleandoc.append('\n')
131148
self.cleandoc.append(self.md.htmlStash.store(element))
132149
self.cleandoc.append('\n\n')
133150
self.state = []
134151
else:
135152
# Treat orphan closing tag as a span level tag.
136153
text = self.get_endtag_text(tag)
137-
self.handle_data(text)
154+
if self.mdstate and self.mdstate[-1] == "off":
155+
self.handle_data(self.md.htmlStash.store(text))
156+
else:
157+
self.handle_data(text)
138158
else:
139159
# Span level tag
140160
if self.inraw:
141161
super().handle_endtag(tag)
142162
else:
143163
text = self.get_endtag_text(tag)
144-
self.handle_data(text)
164+
if self.mdstate and self.mdstate[-1] == "off":
165+
self.handle_data(self.md.htmlStash.store(text))
166+
else:
167+
self.handle_data(text)
145168

146169
def handle_data(self, data):
147170
if self.inraw or not self.mdstack:
@@ -156,7 +179,10 @@ def handle_empty_tag(self, data, is_block):
156179
if self.at_line_start() and is_block:
157180
self.handle_data('\n' + self.md.htmlStash.store(data) + '\n\n')
158181
else:
159-
self.handle_data(data)
182+
if self.mdstate and self.mdstate[-1] == "off":
183+
self.handle_data(self.md.htmlStash.store(data))
184+
else:
185+
self.handle_data(data)
160186

161187

162188
class HtmlBlockPreprocessor(Preprocessor):

tests/test_syntax/extensions/test_md_in_html.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,166 @@ def test_md1_nested_empty_block(self):
390390
)
391391
)
392392

393+
def test_orphan_end_tag_in_raw_html(self):
394+
self.assertMarkdownRenders(
395+
self.dedent(
396+
"""
397+
<div markdown="1">
398+
<div>
399+
Test
400+
401+
</pre>
402+
403+
Test
404+
</div>
405+
</div>
406+
"""
407+
),
408+
self.dedent(
409+
"""
410+
<div>
411+
<div>
412+
Test
413+
414+
</pre>
415+
416+
Test
417+
</div>
418+
</div>
419+
"""
420+
)
421+
)
422+
423+
def test_complex_nested_case(self):
424+
self.assertMarkdownRenders(
425+
self.dedent(
426+
"""
427+
<div markdown="1">
428+
**test**
429+
<div>
430+
**test**
431+
<img src=""/>
432+
<code>Test</code>
433+
<span>**test**</span>
434+
<p>Test 2</p>
435+
</div>
436+
</div>
437+
"""
438+
),
439+
self.dedent(
440+
"""
441+
<div>
442+
<p><strong>test</strong></p>
443+
<div>
444+
**test**
445+
<img src=""/>
446+
<code>Test</code>
447+
<span>**test**</span>
448+
<p>Test 2</p>
449+
</div>
450+
</div>
451+
"""
452+
)
453+
)
454+
455+
def test_complex_nested_case_whitespace(self):
456+
self.assertMarkdownRenders(
457+
self.dedent(
458+
"""
459+
Text with space\t
460+
<div markdown="1">\t
461+
\t
462+
<div>
463+
**test**
464+
<img src=""/>
465+
<code>Test</code>
466+
<span>**test**</span>
467+
<div>With whitespace</div>
468+
<p>Test 2</p>
469+
</div>
470+
**test**
471+
</div>
472+
"""
473+
),
474+
self.dedent(
475+
"""
476+
<p>Text with space </p>
477+
<div>
478+
<div>
479+
**test**
480+
<img src=""/>
481+
<code>Test</code>
482+
<span>**test**</span>
483+
<div>With whitespace</div>
484+
<p>Test 2</p>
485+
</div>
486+
<p><strong>test</strong></p>
487+
</div>
488+
"""
489+
)
490+
)
491+
492+
def test_md1_intail_md1(self):
493+
self.assertMarkdownRenders(
494+
'<div markdown="1">*foo*</div><div markdown="1">*bar*</div>',
495+
self.dedent(
496+
"""
497+
<div>
498+
<p><em>foo</em></p>
499+
</div>
500+
<div>
501+
<p><em>bar</em></p>
502+
</div>
503+
"""
504+
)
505+
)
506+
507+
def test_md1_no_blank_line_before(self):
508+
self.assertMarkdownRenders(
509+
self.dedent(
510+
"""
511+
A _Markdown_ paragraph with no blank line after.
512+
<div markdown="1">
513+
A _Markdown_ paragraph in an HTML block with no blank line before.
514+
</div>
515+
"""
516+
),
517+
self.dedent(
518+
"""
519+
<p>A <em>Markdown</em> paragraph with no blank line after.</p>
520+
<div>
521+
<p>A <em>Markdown</em> paragraph in an HTML block with no blank line before.</p>
522+
</div>
523+
"""
524+
)
525+
)
526+
527+
def test_md1_no_line_break(self):
528+
# The div here is parsed as a span-level element. Bad input equals bad output!
529+
self.assertMarkdownRenders(
530+
'A _Markdown_ paragraph with <div markdown="1">no _line break_.</div>',
531+
'<p>A <em>Markdown</em> paragraph with <div markdown="1">no <em>line break</em>.</div></p>'
532+
)
533+
534+
def test_md1_in_tail(self):
535+
self.assertMarkdownRenders(
536+
self.dedent(
537+
"""
538+
<div></div><div markdown="1">
539+
A _Markdown_ paragraph in an HTML block in tail of previous element.
540+
</div>
541+
"""
542+
),
543+
self.dedent(
544+
"""
545+
<div></div>
546+
<div>
547+
<p>A <em>Markdown</em> paragraph in an HTML block in tail of previous element.</p>
548+
</div>
549+
"""
550+
)
551+
)
552+
393553
def test_md_span_paragraph(self):
394554
self.assertMarkdownRenders(
395555
'<p markdown="span">*foo*</p>',

0 commit comments

Comments
 (0)