diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index c2c30783..ae980c55 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -204,8 +204,8 @@ def mainLoop(self):
elif type == DoctypeToken:
new_token = phase.processDoctype(new_token)
- if (type == StartTagToken and token["selfClosing"]
- and not token["selfClosingAcknowledged"]):
+ if (type == StartTagToken and token["selfClosing"] and
+ not token["selfClosingAcknowledged"]):
self.parseError("non-void-element-with-trailing-solidus",
{"name": token["name"]})
@@ -517,77 +517,76 @@ def processDoctype(self, token):
if publicId != "":
publicId = publicId.translate(asciiUpper2Lower)
- if (not correct or token["name"] != "html"
- or publicId.startswith(
- ("+//silmaril//dtd html pro v0r11 19970101//",
- "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
- "-//as//dtd html 3.0 aswedit + extensions//",
- "-//ietf//dtd html 2.0 level 1//",
- "-//ietf//dtd html 2.0 level 2//",
- "-//ietf//dtd html 2.0 strict level 1//",
- "-//ietf//dtd html 2.0 strict level 2//",
- "-//ietf//dtd html 2.0 strict//",
- "-//ietf//dtd html 2.0//",
- "-//ietf//dtd html 2.1e//",
- "-//ietf//dtd html 3.0//",
- "-//ietf//dtd html 3.2 final//",
- "-//ietf//dtd html 3.2//",
- "-//ietf//dtd html 3//",
- "-//ietf//dtd html level 0//",
- "-//ietf//dtd html level 1//",
- "-//ietf//dtd html level 2//",
- "-//ietf//dtd html level 3//",
- "-//ietf//dtd html strict level 0//",
- "-//ietf//dtd html strict level 1//",
- "-//ietf//dtd html strict level 2//",
- "-//ietf//dtd html strict level 3//",
- "-//ietf//dtd html strict//",
- "-//ietf//dtd html//",
- "-//metrius//dtd metrius presentational//",
- "-//microsoft//dtd internet explorer 2.0 html strict//",
- "-//microsoft//dtd internet explorer 2.0 html//",
- "-//microsoft//dtd internet explorer 2.0 tables//",
- "-//microsoft//dtd internet explorer 3.0 html strict//",
- "-//microsoft//dtd internet explorer 3.0 html//",
- "-//microsoft//dtd internet explorer 3.0 tables//",
- "-//netscape comm. corp.//dtd html//",
- "-//netscape comm. corp.//dtd strict html//",
- "-//o'reilly and associates//dtd html 2.0//",
- "-//o'reilly and associates//dtd html extended 1.0//",
- "-//o'reilly and associates//dtd html extended relaxed 1.0//",
- "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
- "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
- "-//spyglass//dtd html 2.0 extended//",
- "-//sq//dtd html 2.0 hotmetal + extensions//",
- "-//sun microsystems corp.//dtd hotjava html//",
- "-//sun microsystems corp.//dtd hotjava strict html//",
- "-//w3c//dtd html 3 1995-03-24//",
- "-//w3c//dtd html 3.2 draft//",
- "-//w3c//dtd html 3.2 final//",
- "-//w3c//dtd html 3.2//",
- "-//w3c//dtd html 3.2s draft//",
- "-//w3c//dtd html 4.0 frameset//",
- "-//w3c//dtd html 4.0 transitional//",
- "-//w3c//dtd html experimental 19960712//",
- "-//w3c//dtd html experimental 970421//",
- "-//w3c//dtd w3 html//",
- "-//w3o//dtd w3 html 3.0//",
- "-//webtechs//dtd mozilla html 2.0//",
- "-//webtechs//dtd mozilla html//"))
- or publicId in
- ("-//w3o//dtd w3 html strict 3.0//en//",
- "-/w3c/dtd html 4.0 transitional/en",
- "html")
- or publicId.startswith(
- ("-//w3c//dtd html 4.01 frameset//",
- "-//w3c//dtd html 4.01 transitional//")) and
- systemId is None
- or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
+ if (not correct or token["name"] != "html" or
+ publicId.startswith(
+ ("+//silmaril//dtd html pro v0r11 19970101//",
+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+ "-//as//dtd html 3.0 aswedit + extensions//",
+ "-//ietf//dtd html 2.0 level 1//",
+ "-//ietf//dtd html 2.0 level 2//",
+ "-//ietf//dtd html 2.0 strict level 1//",
+ "-//ietf//dtd html 2.0 strict level 2//",
+ "-//ietf//dtd html 2.0 strict//",
+ "-//ietf//dtd html 2.0//",
+ "-//ietf//dtd html 2.1e//",
+ "-//ietf//dtd html 3.0//",
+ "-//ietf//dtd html 3.2 final//",
+ "-//ietf//dtd html 3.2//",
+ "-//ietf//dtd html 3//",
+ "-//ietf//dtd html level 0//",
+ "-//ietf//dtd html level 1//",
+ "-//ietf//dtd html level 2//",
+ "-//ietf//dtd html level 3//",
+ "-//ietf//dtd html strict level 0//",
+ "-//ietf//dtd html strict level 1//",
+ "-//ietf//dtd html strict level 2//",
+ "-//ietf//dtd html strict level 3//",
+ "-//ietf//dtd html strict//",
+ "-//ietf//dtd html//",
+ "-//metrius//dtd metrius presentational//",
+ "-//microsoft//dtd internet explorer 2.0 html strict//",
+ "-//microsoft//dtd internet explorer 2.0 html//",
+ "-//microsoft//dtd internet explorer 2.0 tables//",
+ "-//microsoft//dtd internet explorer 3.0 html strict//",
+ "-//microsoft//dtd internet explorer 3.0 html//",
+ "-//microsoft//dtd internet explorer 3.0 tables//",
+ "-//netscape comm. corp.//dtd html//",
+ "-//netscape comm. corp.//dtd strict html//",
+ "-//o'reilly and associates//dtd html 2.0//",
+ "-//o'reilly and associates//dtd html extended 1.0//",
+ "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+ "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+ "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+ "-//spyglass//dtd html 2.0 extended//",
+ "-//sq//dtd html 2.0 hotmetal + extensions//",
+ "-//sun microsystems corp.//dtd hotjava html//",
+ "-//sun microsystems corp.//dtd hotjava strict html//",
+ "-//w3c//dtd html 3 1995-03-24//",
+ "-//w3c//dtd html 3.2 draft//",
+ "-//w3c//dtd html 3.2 final//",
+ "-//w3c//dtd html 3.2//",
+ "-//w3c//dtd html 3.2s draft//",
+ "-//w3c//dtd html 4.0 frameset//",
+ "-//w3c//dtd html 4.0 transitional//",
+ "-//w3c//dtd html experimental 19960712//",
+ "-//w3c//dtd html experimental 970421//",
+ "-//w3c//dtd w3 html//",
+ "-//w3o//dtd w3 html 3.0//",
+ "-//webtechs//dtd mozilla html 2.0//",
+ "-//webtechs//dtd mozilla html//")) or
+ publicId in ("-//w3o//dtd w3 html strict 3.0//en//",
+ "-/w3c/dtd html 4.0 transitional/en",
+ "html") or
+ publicId.startswith(
+ ("-//w3c//dtd html 4.01 frameset//",
+ "-//w3c//dtd html 4.01 transitional//")) and
+ systemId is None or
+ systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
self.parser.compatMode = "quirks"
elif (publicId.startswith(
("-//w3c//dtd xhtml 1.0 frameset//",
- "-//w3c//dtd xhtml 1.0 transitional//"))
- or publicId.startswith(
+ "-//w3c//dtd xhtml 1.0 transitional//")) or
+ publicId.startswith(
("-//w3c//dtd html 4.01 frameset//",
"-//w3c//dtd html 4.01 transitional//")) and
systemId is not None):
@@ -988,8 +987,8 @@ def processSpaceCharactersDropNewline(self, token):
data = token["data"]
self.processSpaceCharacters = self.processSpaceCharactersNonPre
if (data.startswith("\n") and
- self.tree.openElements[-1].name in ("pre", "listing", "textarea")
- and not self.tree.openElements[-1].hasContent()):
+ self.tree.openElements[-1].name in ("pre", "listing", "textarea") and
+ not self.tree.openElements[-1].hasContent()):
data = data[1:]
if data:
self.tree.reconstructActiveFormattingElements()
@@ -1016,8 +1015,8 @@ def startTagProcessInHead(self, token):
def startTagBody(self, token):
self.parser.parseError("unexpected-start-tag", {"name": "body"})
- if (len(self.tree.openElements) == 1
- or self.tree.openElements[1].name != "body"):
+ if (len(self.tree.openElements) == 1 or
+ self.tree.openElements[1].name != "body"):
assert self.parser.innerHTML
else:
self.parser.framesetOK = False
diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer/htmlserializer.py
index be4d6344..b87d9a75 100644
--- a/html5lib/serializer/htmlserializer.py
+++ b/html5lib/serializer/htmlserializer.py
@@ -237,8 +237,8 @@ def serialize(self, treewalker, encoding=None):
yield self.encodeStrict(k)
if not self.minimize_boolean_attributes or \
- (k not in booleanAttributes.get(name, tuple())
- and k not in booleanAttributes.get("", tuple())):
+ (k not in booleanAttributes.get(name, tuple()) and
+ k not in booleanAttributes.get("", tuple())):
yield self.encodeStrict("=")
if self.quote_attr_values or not v:
quote_attr = True
diff --git a/html5lib/tests/mockParser.py b/html5lib/tests/mockParser.py
deleted file mode 100644
index ef31527e..00000000
--- a/html5lib/tests/mockParser.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import sys
-import os
-
-if __name__ == '__main__':
- # Allow us to import from the src directory
- os.chdir(os.path.split(os.path.abspath(__file__))[0])
- sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
-
-from html5lib.tokenizer import HTMLTokenizer
-
-
-class HTMLParser(object):
- """ Fake parser to test tokenizer output """
- def parse(self, stream, output=True):
- tokenizer = HTMLTokenizer(stream)
- for token in tokenizer:
- if output:
- print(token)
-
-if __name__ == "__main__":
- x = HTMLParser()
- if len(sys.argv) > 1:
- if len(sys.argv) > 2:
- import hotshot
- import hotshot.stats
- prof = hotshot.Profile('stats.prof')
- prof.runcall(x.parse, sys.argv[1], False)
- prof.close()
- stats = hotshot.stats.load('stats.prof')
- stats.strip_dirs()
- stats.sort_stats('time')
- stats.print_stats()
- else:
- x.parse(sys.argv[1])
- else:
- print("""Usage: python mockParser.py filename [stats]
- If stats is specified the hotshots profiler will run and output the
- stats instead.
- """)
diff --git a/html5lib/tests/performance/concatenation.py b/html5lib/tests/performance/concatenation.py
deleted file mode 100644
index a1465036..00000000
--- a/html5lib/tests/performance/concatenation.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-def f1():
- x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- x += y + z
-
-
-def f2():
- x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- x = x + y + z
-
-
-def f3():
- x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- x = "".join((x, y, z))
-
-
-def f4():
- x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
- x = "%s%s%s" % (x, y, z)
-
-import timeit
-for x in range(4):
- statement = "f%s" % (x + 1)
- t = timeit.Timer(statement, "from __main__ import " + statement)
- r = t.repeat(3, 1000000)
- print(r, min(r))
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
index 3c37feff..af76075e 100644
--- a/html5lib/tests/test_serializer.py
+++ b/html5lib/tests/test_serializer.py
@@ -91,8 +91,7 @@ def runSerializerTest(input, expected, options):
encoding = options.get("encoding", None)
if encoding:
- encode = lambda x: x.encode(encoding)
- expected = list(map(encode, expected))
+ expected = list(map(lambda x: x.encode(encoding), expected))
result = serialize_html(input, options)
if len(expected) == 1:
diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py
index 823c6ea6..87e098f3 100644
--- a/html5lib/tests/test_tokenizer.py
+++ b/html5lib/tests/test_tokenizer.py
@@ -98,8 +98,8 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
"""
checkSelfClosing = False
for token in expectedTokens:
- if (token[0] == "StartTag" and len(token) == 4
- or token[0] == "EndTag" and len(token) == 3):
+ if (token[0] == "StartTag" and len(token) == 4 or
+ token[0] == "EndTag" and len(token) == 3):
checkSelfClosing = True
break
diff --git a/html5lib/treebuilders/_base.py b/html5lib/treebuilders/_base.py
index 8b97cc11..8196f591 100644
--- a/html5lib/treebuilders/_base.py
+++ b/html5lib/treebuilders/_base.py
@@ -353,8 +353,8 @@ def getTableMisnestedNodePosition(self):
def generateImpliedEndTags(self, exclude=None):
name = self.openElements[-1].name
# XXX td, th and tr are not actually needed
- if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
- and name != exclude):
+ if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and
+ name != exclude):
self.openElements.pop()
# XXX This is not entirely what the specification says. We should
# investigate it more closely.
diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
index 234233b7..8656244f 100644
--- a/html5lib/treebuilders/dom.py
+++ b/html5lib/treebuilders/dom.py
@@ -47,8 +47,8 @@ def __init__(self, element):
_base.Node.__init__(self, element.nodeName)
self.element = element
- namespace = property(lambda self: hasattr(self.element, "namespaceURI")
- and self.element.namespaceURI or None)
+ namespace = property(lambda self: hasattr(self.element, "namespaceURI") and
+ self.element.namespaceURI or None)
def appendChild(self, node):
node.parent = self
diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
index 21f46b01..00ae2804 100644
--- a/html5lib/treewalkers/__init__.py
+++ b/html5lib/treewalkers/__init__.py
@@ -10,11 +10,11 @@
from __future__ import absolute_import, division, unicode_literals
-__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree"]
-
from .. import constants
from ..utils import default_etree
+__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree"]
+
treeWalkerCache = {}
diff --git a/html5lib/treewalkers/_base.py b/html5lib/treewalkers/_base.py
index 4e11cd02..e79a4357 100644
--- a/html5lib/treewalkers/_base.py
+++ b/html5lib/treewalkers/_base.py
@@ -1,11 +1,12 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type, string_types
+from xml.dom import Node
+from ..constants import voidElements, spaceCharacters
+
__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
"TreeWalker", "NonRecursiveTreeWalker"]
-from xml.dom import Node
-
DOCUMENT = Node.DOCUMENT_NODE
DOCTYPE = Node.DOCUMENT_TYPE_NODE
TEXT = Node.TEXT_NODE
@@ -14,7 +15,6 @@
ENTITY = Node.ENTITY_NODE
UNKNOWN = "<#UNKNOWN#>"
-from ..constants import voidElements, spaceCharacters
spaceCharacters = "".join(spaceCharacters)
diff --git a/html5lib/treewalkers/genshistream.py b/html5lib/treewalkers/genshistream.py
index f559c45d..24d33282 100644
--- a/html5lib/treewalkers/genshistream.py
+++ b/html5lib/treewalkers/genshistream.py
@@ -39,8 +39,8 @@ def tokens(self, event, next):
if namespace == namespaces["html"] and name in voidElements:
for token in self.emptyTag(namespace, name, converted_attribs,
- not next or next[0] != END
- or next[1] != tag):
+ not next or next[0] != END or
+ next[1] != tag):
yield token
else:
yield self.startTag(namespace, name, converted_attribs)