Skip to content

Commit 0c551c9

Browse files
committed
Make lxml tree-builder coerce comments to work with lxml 3.5.
1 parent dae03f6 commit 0c551c9

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

html5lib/ihatexml.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,9 @@ def coerceComment(self, data):
225225
while "--" in data:
226226
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
227227
data = data.replace("--", "- -")
228+
if data.endswith("-"):
229+
warnings.warn("Comments cannot end in a dash", DataLossWarning)
230+
data += " "
228231
return data
229232

230233
def coerceCharacters(self, data):

html5lib/treebuilders/etree_lxml.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def _getChildNodes(self):
5454
def testSerializer(element):
5555
rv = []
5656
finalText = None
57-
infosetFilter = ihatexml.InfosetFilter()
57+
infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
5858

5959
def serializeElement(element, indent=0):
6060
if not hasattr(element, "tag"):
@@ -189,7 +189,7 @@ class TreeBuilder(_base.TreeBuilder):
189189

190190
def __init__(self, namespaceHTMLElements, fullTree=False):
191191
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
192-
infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
192+
infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
193193
self.namespaceHTMLElements = namespaceHTMLElements
194194

195195
class Attributes(dict):
@@ -257,7 +257,7 @@ def _getData(self):
257257
data = property(_getData, _setData)
258258

259259
self.elementClass = Element
260-
self.commentClass = builder.Comment
260+
self.commentClass = Comment
261261
# self.fragmentClass = builder.DocumentFragment
262262
_base.TreeBuilder.__init__(self, namespaceHTMLElements)
263263

@@ -344,7 +344,8 @@ def insertRoot(self, token):
344344

345345
# Append the initial comments:
346346
for comment_token in self.initial_comments:
347-
root.addprevious(etree.Comment(comment_token["data"]))
347+
comment = self.commentClass(comment_token["data"])
348+
root.addprevious(comment._element)
348349

349350
# Create the root document and add the ElementTree to it
350351
self.document = self.documentClass()

0 commit comments

Comments
 (0)