Skip to content

Commit 81cc5b8

Browse files
authored
Properly parse code spans in md_in_html (#1069)
This reverts part of 2766698 and re-implements handling of tails in the same manner as the core. Also, ensure line_offset doesn't raise an error on bad input (see #1066) and properly handle script tags in code spans (same as in the core). Fixes #1068.
1 parent 447da66 commit 81cc5b8

File tree

4 files changed

+92
-15
lines changed

4 files changed

+92
-15
lines changed

docs/change_log/index.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ title: Change Log
33
Python-Markdown Change Log
44
=========================
55

6+
Under development: version 3.3.4 (a bug-fix release).
7+
8+
* Properly parse code spans in md_in_html (#1069).
9+
610
Oct 25, 2020: version 3.3.3 (a bug-fix release).
711

812
* Unify all block-level tags (#1047).

markdown/extensions/md_in_html.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from ..preprocessors import Preprocessor
2020
from ..postprocessors import RawHtmlPostprocessor
2121
from .. import util
22-
from ..htmlparser import HTMLExtractor
22+
from ..htmlparser import HTMLExtractor, blank_line_re
2323
import xml.etree.ElementTree as etree
2424

2525

@@ -85,17 +85,9 @@ def get_state(self, tag, attrs):
8585
else: # pragma: no cover
8686
return None
8787

88-
def at_line_start(self):
89-
"""At line start."""
90-
91-
value = super().at_line_start()
92-
if not value and self.cleandoc and self.cleandoc[-1].endswith('\n'):
93-
value = True
94-
return value
95-
9688
def handle_starttag(self, tag, attrs):
9789
# Handle tags that should always be empty and do not specify a closing tag
98-
if tag in self.empty_tags:
90+
if tag in self.empty_tags and (self.at_line_start() or self.intail):
9991
attrs = {key: value if value is not None else key for key, value in attrs}
10092
if "markdown" in attrs:
10193
attrs.pop('markdown')
@@ -106,13 +98,12 @@ def handle_starttag(self, tag, attrs):
10698
self.handle_empty_tag(data, True)
10799
return
108100

109-
if tag in self.block_level_tags:
101+
if tag in self.block_level_tags and (self.at_line_start() or self.intail):
110102
# Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`.
111103
# Convert to `{'checked': 'checked'}`.
112104
attrs = {key: value if value is not None else key for key, value in attrs}
113105
state = self.get_state(tag, attrs)
114-
115-
if self.inraw or (state in [None, 'off'] and not self.mdstack) or not self.at_line_start():
106+
if self.inraw or (state in [None, 'off'] and not self.mdstack):
116107
# fall back to default behavior
117108
attrs.pop('markdown', None)
118109
super().handle_starttag(tag, attrs)
@@ -134,6 +125,9 @@ def handle_starttag(self, tag, attrs):
134125
self.handle_data(self.md.htmlStash.store(text))
135126
else:
136127
self.handle_data(text)
128+
if tag in self.CDATA_CONTENT_ELEMENTS:
129+
# This is presumably a standalone tag in a code span (see #1036).
130+
self.clear_cdata_mode()
137131

138132
def handle_endtag(self, tag):
139133
if tag in self.block_level_tags:
@@ -159,6 +153,11 @@ def handle_endtag(self, tag):
159153
self.cleandoc.append(self.md.htmlStash.store(element))
160154
self.cleandoc.append('\n\n')
161155
self.state = []
156+
# Check if element has a tail
157+
if not blank_line_re.match(
158+
self.rawdata[self.line_offset + self.offset + len(self.get_endtag_text(tag)):]):
159+
# More content exists after endtag.
160+
self.intail = True
162161
else:
163162
# Treat orphan closing tag as a span level tag.
164163
text = self.get_endtag_text(tag)
@@ -191,6 +190,8 @@ def handle_startendtag(self, tag, attrs):
191190
self.handle_empty_tag(data, is_block=self.md.is_block_level(tag))
192191

193192
def handle_data(self, data):
193+
if self.intail and '\n' in data:
194+
self.intail = False
194195
if self.inraw or not self.mdstack:
195196
super().handle_data(data)
196197
else:

markdown/htmlparser.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,14 @@ def close(self):
9191
@property
9292
def line_offset(self):
9393
"""Returns char index in self.rawdata for the start of the current line. """
94-
if self.lineno > 1:
95-
return re.match(r'([^\n]*\n){{{}}}'.format(self.lineno-1), self.rawdata).end()
94+
if self.lineno > 1 and '\n' in self.rawdata:
95+
m = re.match(r'([^\n]*\n){{{}}}'.format(self.lineno-1), self.rawdata)
96+
if m:
97+
return m.end()
98+
else: # pragma: no cover
99+
# Value of self.lineno must exceed total number of lines.
100+
# Find index of begining of last line.
101+
return self.rawdata.rfind('\n')
96102
return 0
97103

98104
def at_line_start(self):

tests/test_syntax/extensions/test_md_in_html.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,72 @@ def test_md1_div_linebreaks(self):
126126
)
127127
)
128128

129+
def test_md1_code_span(self):
130+
self.assertMarkdownRenders(
131+
self.dedent(
132+
"""
133+
<div markdown="1">
134+
`<h1>code span</h1>`
135+
</div>
136+
"""
137+
),
138+
self.dedent(
139+
"""
140+
<div>
141+
<p><code>&lt;h1&gt;code span&lt;/h1&gt;</code></p>
142+
</div>
143+
"""
144+
)
145+
)
146+
147+
def test_md1_code_span_oneline(self):
148+
self.assertMarkdownRenders(
149+
'<div markdown="1">`<h1>code span</h1>`</div>',
150+
self.dedent(
151+
"""
152+
<div>
153+
<p><code>&lt;h1&gt;code span&lt;/h1&gt;</code></p>
154+
</div>
155+
"""
156+
)
157+
)
158+
159+
def test_md1_code_span_unclosed(self):
160+
self.assertMarkdownRenders(
161+
self.dedent(
162+
"""
163+
<div markdown="1">
164+
`<p>`
165+
</div>
166+
"""
167+
),
168+
self.dedent(
169+
"""
170+
<div>
171+
<p><code>&lt;p&gt;</code></p>
172+
</div>
173+
"""
174+
)
175+
)
176+
177+
def test_md1_code_span_script_tag(self):
178+
self.assertMarkdownRenders(
179+
self.dedent(
180+
"""
181+
<div markdown="1">
182+
`<script>`
183+
</div>
184+
"""
185+
),
186+
self.dedent(
187+
"""
188+
<div>
189+
<p><code>&lt;script&gt;</code></p>
190+
</div>
191+
"""
192+
)
193+
)
194+
129195
def test_md1_div_blank_lines(self):
130196
self.assertMarkdownRenders(
131197
self.dedent(

0 commit comments

Comments
 (0)