From 43dec3b98a51e5fd5e0ae9ba5a221b8dffb80be3 Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Thu, 4 Aug 2022 01:33:21 -0700
Subject: [PATCH 01/53] LXML based html_to_vdom
---
requirements/pkg-deps.txt | 1 +
src/idom/utils.py | 158 ++++++++++++++++----------------------
2 files changed, 66 insertions(+), 93 deletions(-)
diff --git a/requirements/pkg-deps.txt b/requirements/pkg-deps.txt
index f13b33bf9..5e4835f12 100644
--- a/requirements/pkg-deps.txt
+++ b/requirements/pkg-deps.txt
@@ -6,3 +6,4 @@ fastjsonschema >=2.14.5
requests >=2
colorlog >=6
asgiref >=3
+lxml >= 4
diff --git a/src/idom/utils.py b/src/idom/utils.py
index e8f9cfd01..3d30a7ba4 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -1,8 +1,11 @@
-from html.parser import HTMLParser as _HTMLParser
-from typing import Any, Callable, Dict, Generic, List, Optional, Tuple, TypeVar
+from typing import Any, Callable, Dict, Generic, TypeVar, Union
+
+from lxml import etree
+from lxml.html import fragment_fromstring
_RefValue = TypeVar("_RefValue")
+_ModelTransform = Callable[[Dict[str, Any]], Any]
_UNDEFINED: Any = object()
@@ -49,94 +52,63 @@ def __repr__(self) -> str:
return f"{type(self).__name__}({current})"
-_ModelTransform = Callable[[Dict[str, Any]], Any]
-
-
-def html_to_vdom(source: str, *transforms: _ModelTransform) -> Dict[str, Any]:
- """Transform HTML into a DOM model
-
- Parameters:
- source:
- The raw HTML as a string
- transforms:
- Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM
- dictionary which will be replaced by ``new``. For example, you could use a
- transform function to add highlighting to a ```` block.
- """
- parser = HtmlParser()
- parser.feed(source)
- root = parser.model()
- to_visit = [root]
- while to_visit:
- node = to_visit.pop(0)
- if isinstance(node, dict) and "children" in node:
- transformed = []
- for child in node["children"]:
- if isinstance(child, dict):
- for t in transforms:
- child = t(child)
- if child is not None:
- transformed.append(child)
- to_visit.append(child)
- node["children"] = transformed
- if "attributes" in node and not node["attributes"]:
- del node["attributes"]
- if "children" in node and not node["children"]:
- del node["children"]
- return root
-
-
-class HtmlParser(_HTMLParser):
- """HTML to VDOM parser
-
- Example:
-
- .. code-block::
-
- parser = HtmlParser()
-
- parser.feed(an_html_string)
- parser.feed(another_html_string)
- ...
-
- vdom = parser.model()
- """
-
- def model(self) -> Dict[str, Any]:
- """Get the current state of parsed VDOM model"""
- return self._node_stack[0]
-
- def feed(self, data: str) -> None:
- """Feed in HTML that will update the :meth:`HtmlParser.model`"""
- self._node_stack.append(self._make_vdom("div", {}))
- super().feed(data)
-
- def reset(self) -> None:
- """Reset the state of the parser"""
- self._node_stack: List[Dict[str, Any]] = []
- super().reset()
-
- def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
- new = self._make_vdom(tag, dict(attrs))
- current = self._node_stack[-1]
- current["children"].append(new)
- self._node_stack.append(new)
-
- def handle_endtag(self, tag: str) -> None:
- del self._node_stack[-1]
-
- def handle_data(self, data: str) -> None:
- self._node_stack[-1]["children"].append(data)
-
- @staticmethod
- def _make_vdom(tag: str, attrs: Dict[str, Any]) -> Dict[str, Any]:
- if "style" in attrs:
- style = attrs["style"]
- if isinstance(style, str):
- style_dict = {}
- for k, v in (part.split(":", 1) for part in style.split(";") if part):
- title_case_key = k.title().replace("-", "")
- camel_case_key = title_case_key[:1].lower() + title_case_key[1:]
- style_dict[camel_case_key] = v
- attrs["style"] = style_dict
- return {"tagName": tag, "attributes": attrs, "children": []}
+def _set_if_val_exists(object, key, value):
+ """Sets a key on a dictionary if the value's length is greater than 0."""
+ if len(value):
+ object[key] = value
+
+
+def _vdom_attributes(object):
+ if "attributes" in object and "style" in object["attributes"]:
+ style = object["attributes"]["style"]
+ if isinstance(style, str):
+ style_dict = {}
+ for k, v in (part.split(":", 1) for part in style.split(";") if part):
+ title_case_key = k.title().replace("-", "")
+ camel_case_key = title_case_key[:1].lower() + title_case_key[1:]
+ style_dict[camel_case_key] = v
+ object["attributes"]["style"] = style_dict
+
+
+def _vdom_key(object):
+ if object["tagName"] == "script":
+ if not isinstance(object["children"][0], str):
+ # The script tag contents should be the first child
+ raise TypeError("Could not find script tag contents!")
+ if object["children"][0]:
+ object["key"] = object["children"][0]
+
+
+def html_to_vdom(html: Union[str, etree._Element], *transforms: _ModelTransform):
+ """Convert an lxml.etree node tree into a VDOM dict."""
+ # Keep track of whether this is the root node
+ root_node = False
+
+ # If the user provided a string, convert it to an lxml.etree node.
+ if isinstance(html, str):
+ parser = etree.HTMLParser()
+ node = fragment_fromstring(html, create_parent=True, parser=parser)
+ root_node = True
+ elif isinstance(html, etree._Element):
+ node = html
+ else:
+ raise TypeError("html_to_vdom expects a string or lxml.etree._Element")
+
+ # Convert the lxml.etree node to a VDOM dict.
+ vdom = {"tagName": node.tag}
+ node_children = [node.text] if node.text else []
+ node_children.extend([html_to_vdom(child) for child in node.iterchildren(None)])
+ _set_if_val_exists(vdom, "children", node_children)
+ _set_if_val_exists(vdom, "attributes", dict(node.items()))
+ _vdom_attributes(vdom)
+ _vdom_key(vdom)
+
+ # Apply any provided transforms.
+ for transform in transforms:
+ vdom = transform(vdom)
+
+ # The root node is always a React Fragment
+ if root_node:
+ vdom["tagName"] = ""
+
+ return vdom
From f0a3220ebda1c44bd7afa515634d43b11c1e1665 Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Thu, 4 Aug 2022 01:55:48 -0700
Subject: [PATCH 02/53] better interface for html_to_vdom
---
src/idom/utils.py | 138 ++++++++++++++++++++++++++--------------------
1 file changed, 78 insertions(+), 60 deletions(-)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index 3d30a7ba4..66b15cfed 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -52,63 +52,81 @@ def __repr__(self) -> str:
return f"{type(self).__name__}({current})"
-def _set_if_val_exists(object, key, value):
- """Sets a key on a dictionary if the value's length is greater than 0."""
- if len(value):
- object[key] = value
-
-
-def _vdom_attributes(object):
- if "attributes" in object and "style" in object["attributes"]:
- style = object["attributes"]["style"]
- if isinstance(style, str):
- style_dict = {}
- for k, v in (part.split(":", 1) for part in style.split(";") if part):
- title_case_key = k.title().replace("-", "")
- camel_case_key = title_case_key[:1].lower() + title_case_key[1:]
- style_dict[camel_case_key] = v
- object["attributes"]["style"] = style_dict
-
-
-def _vdom_key(object):
- if object["tagName"] == "script":
- if not isinstance(object["children"][0], str):
- # The script tag contents should be the first child
- raise TypeError("Could not find script tag contents!")
- if object["children"][0]:
- object["key"] = object["children"][0]
-
-
-def html_to_vdom(html: Union[str, etree._Element], *transforms: _ModelTransform):
- """Convert an lxml.etree node tree into a VDOM dict."""
- # Keep track of whether this is the root node
- root_node = False
-
- # If the user provided a string, convert it to an lxml.etree node.
- if isinstance(html, str):
- parser = etree.HTMLParser()
- node = fragment_fromstring(html, create_parent=True, parser=parser)
- root_node = True
- elif isinstance(html, etree._Element):
- node = html
- else:
- raise TypeError("html_to_vdom expects a string or lxml.etree._Element")
-
- # Convert the lxml.etree node to a VDOM dict.
- vdom = {"tagName": node.tag}
- node_children = [node.text] if node.text else []
- node_children.extend([html_to_vdom(child) for child in node.iterchildren(None)])
- _set_if_val_exists(vdom, "children", node_children)
- _set_if_val_exists(vdom, "attributes", dict(node.items()))
- _vdom_attributes(vdom)
- _vdom_key(vdom)
-
- # Apply any provided transforms.
- for transform in transforms:
- vdom = transform(vdom)
-
- # The root node is always a React Fragment
- if root_node:
- vdom["tagName"] = ""
-
- return vdom
+def html_to_vdom(html: str, *transforms: _ModelTransform):
+ """Transform HTML into a DOM model
+ Parameters:
+ source:
+ The raw HTML as a string
+ transforms:
+ Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM
+ dictionary which will be replaced by ``new``. For example, you could use a
+ transform function to add highlighting to a ``
`` block.
+ """
+
+ if not isinstance(html, str):
+ raise TypeError("html_to_vdom expects a string!")
+
+ return HtmlToVdom().convert(html, *transforms)
+
+
+class HtmlToVdom:
+ def convert(self, html: Union[str, etree._Element], *transforms: _ModelTransform):
+ """Convert an lxml.etree node tree into a VDOM dict."""
+ # Keep track of whether this is the root node
+ root_node = False
+
+ # If the user provided a string, convert it to an lxml.etree node.
+ if isinstance(html, str):
+ parser = etree.HTMLParser()
+ node = fragment_fromstring(html, create_parent=True, parser=parser)
+ root_node = True
+ elif isinstance(html, etree._Element):
+ node = html
+ else:
+ raise TypeError("html_to_vdom expects a string or lxml.etree._Element")
+
+ # Recursively convert the lxml.etree node to a VDOM dict.
+ vdom = {"tagName": node.tag}
+ node_children = [node.text] if node.text else []
+ node_children.extend([self.convert(child) for child in node.iterchildren(None)])
+ self._set_if_val_exists(vdom, "children", node_children)
+ self._set_if_val_exists(vdom, "attributes", dict(node.items()))
+ self._vdom_attributes(vdom)
+ self._vdom_key(vdom)
+
+ # Apply any provided transforms.
+ for transform in transforms:
+ vdom = transform(vdom)
+
+ # The root node is always a React Fragment
+ if root_node:
+ vdom["tagName"] = ""
+
+ return vdom
+
+ @staticmethod
+ def _set_if_val_exists(object, key, value):
+ """Sets a key on a dictionary if the value's length is greater than 0."""
+ if len(value):
+ object[key] = value
+
+ @staticmethod
+ def _vdom_attributes(object):
+ if "attributes" in object and "style" in object["attributes"]:
+ style = object["attributes"]["style"]
+ if isinstance(style, str):
+ style_dict = {}
+ for k, v in (part.split(":", 1) for part in style.split(";") if part):
+ title_case_key = k.title().replace("-", "")
+ camel_case_key = title_case_key[:1].lower() + title_case_key[1:]
+ style_dict[camel_case_key] = v
+ object["attributes"]["style"] = style_dict
+
+ @staticmethod
+ def _vdom_key(object):
+ if object["tagName"] == "script":
+ if not isinstance(object["children"][0], str):
+ # The script tag contents should be the first child
+ raise TypeError("Could not find script tag contents!")
+ if object["children"][0]:
+ object["key"] = object["children"][0]
From c6ad8bfe48abe83b2357db2d25208963dfe21e5f Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Thu, 4 Aug 2022 02:06:42 -0700
Subject: [PATCH 03/53] cleanup typehints and exceptions
---
src/idom/utils.py | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index 66b15cfed..88fb5fc22 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -66,10 +66,10 @@ def html_to_vdom(html: str, *transforms: _ModelTransform):
if not isinstance(html, str):
raise TypeError("html_to_vdom expects a string!")
- return HtmlToVdom().convert(html, *transforms)
+ return _HtmlToVdom().convert(html, *transforms)
-class HtmlToVdom:
+class _HtmlToVdom:
def convert(self, html: Union[str, etree._Element], *transforms: _ModelTransform):
"""Convert an lxml.etree node tree into a VDOM dict."""
# Keep track of whether this is the root node
@@ -83,7 +83,9 @@ def convert(self, html: Union[str, etree._Element], *transforms: _ModelTransform
elif isinstance(html, etree._Element):
node = html
else:
- raise TypeError("html_to_vdom expects a string or lxml.etree._Element")
+ raise TypeError(
+ f"HtmlToVdom encountered unsupported type {type(html)} from {html}"
+ )
# Recursively convert the lxml.etree node to a VDOM dict.
vdom = {"tagName": node.tag}
@@ -105,13 +107,13 @@ def convert(self, html: Union[str, etree._Element], *transforms: _ModelTransform
return vdom
@staticmethod
- def _set_if_val_exists(object, key, value):
+ def _set_if_val_exists(object: Dict, key: str, value: Any):
"""Sets a key on a dictionary if the value's length is greater than 0."""
if len(value):
object[key] = value
@staticmethod
- def _vdom_attributes(object):
+ def _vdom_attributes(object: Dict):
if "attributes" in object and "style" in object["attributes"]:
style = object["attributes"]["style"]
if isinstance(style, str):
@@ -126,7 +128,7 @@ def _vdom_attributes(object):
def _vdom_key(object):
if object["tagName"] == "script":
if not isinstance(object["children"][0], str):
- # The script tag contents should be the first child
- raise TypeError("Could not find script tag contents!")
+ # The script's source should always be the first child
+ raise LookupError("Could not find script's contents!")
if object["children"][0]:
object["key"] = object["children"][0]
From a2d995a6cb2b074c7f831826552b6417170ca61e Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Thu, 4 Aug 2022 03:43:57 -0700
Subject: [PATCH 04/53] variable and function name cleanup
---
src/idom/utils.py | 39 ++++++++++++++++++++-------------------
1 file changed, 20 insertions(+), 19 deletions(-)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index 88fb5fc22..99f17fc53 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, Generic, TypeVar, Union
+from typing import Any, Callable, Dict, Generic, List, TypeVar, Union
from lxml import etree
from lxml.html import fragment_fromstring
@@ -87,14 +87,13 @@ def convert(self, html: Union[str, etree._Element], *transforms: _ModelTransform
f"HtmlToVdom encountered unsupported type {type(html)} from {html}"
)
- # Recursively convert the lxml.etree node to a VDOM dict.
+ # Recursively convert the lxml node to a VDOM dict.
vdom = {"tagName": node.tag}
node_children = [node.text] if node.text else []
node_children.extend([self.convert(child) for child in node.iterchildren(None)])
- self._set_if_val_exists(vdom, "children", node_children)
- self._set_if_val_exists(vdom, "attributes", dict(node.items()))
- self._vdom_attributes(vdom)
- self._vdom_key(vdom)
+ self._set_key_value(vdom, "children", node_children)
+ self._set_key_value(vdom, "attributes", dict(node.items()))
+ self._vdom_mutations(vdom)
# Apply any provided transforms.
for transform in transforms:
@@ -107,28 +106,30 @@ def convert(self, html: Union[str, etree._Element], *transforms: _ModelTransform
return vdom
@staticmethod
- def _set_if_val_exists(object: Dict, key: str, value: Any):
- """Sets a key on a dictionary if the value's length is greater than 0."""
+ def _set_key_value(vdom: Dict, key: str, value: Union[Dict, List]):
+ """Sets a key/value on a dictionary only if the iterable value's length is greater than 0."""
if len(value):
- object[key] = value
+ vdom[key] = value
@staticmethod
- def _vdom_attributes(object: Dict):
- if "attributes" in object and "style" in object["attributes"]:
- style = object["attributes"]["style"]
+ def _vdom_mutations(vdom: Dict):
+ """Performs any necessary mutations on the VDOM attributes to meet VDOM spec
+ and/or to make elements properly renderable in React."""
+ # Convert style attributes to VDOM spec
+ if "attributes" in vdom and "style" in vdom["attributes"]:
+ style = vdom["attributes"]["style"]
if isinstance(style, str):
style_dict = {}
for k, v in (part.split(":", 1) for part in style.split(";") if part):
title_case_key = k.title().replace("-", "")
camel_case_key = title_case_key[:1].lower() + title_case_key[1:]
style_dict[camel_case_key] = v
- object["attributes"]["style"] = style_dict
+ vdom["attributes"]["style"] = style_dict
- @staticmethod
- def _vdom_key(object):
- if object["tagName"] == "script":
- if not isinstance(object["children"][0], str):
+ # Set key attribute for scripts to prevent re-execution during re-renders
+ if vdom["tagName"] == "script":
+ if not isinstance(vdom["children"][0], str):
# The script's source should always be the first child
raise LookupError("Could not find script's contents!")
- if object["children"][0]:
- object["key"] = object["children"][0]
+ if vdom["children"][0]:
+ vdom["key"] = vdom["children"][0]
From 4dfde93fccf4da95fff6ffdf27de5b9032fbd492 Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Thu, 4 Aug 2022 03:57:07 -0700
Subject: [PATCH 05/53] fix tests
---
tests/test_utils.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index cca97a0ac..49f9c43d4 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -61,7 +61,7 @@ def test_ref_repr():
)
def test_html_to_vdom(case):
assert html_to_vdom(case["source"]) == {
- "tagName": "div",
+ "tagName": "",
"children": [case["model"]],
}
@@ -93,6 +93,6 @@ def make_links_blue(node):
}
assert html_to_vdom(source, make_links_blue) == {
- "tagName": "div",
+ "tagName": "",
"children": [expected],
}
From e5fdfc34076136631722d0bc96c9c89027d918da Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Thu, 4 Aug 2022 05:54:27 -0700
Subject: [PATCH 06/53] fix more tests
---
src/idom/utils.py | 16 ++++++++++++----
tests/test_utils.py | 3 ++-
2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index 99f17fc53..154c112c3 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -1,3 +1,4 @@
+from itertools import chain
from typing import Any, Callable, Dict, Generic, List, TypeVar, Union
from lxml import etree
@@ -87,11 +88,9 @@ def convert(self, html: Union[str, etree._Element], *transforms: _ModelTransform
f"HtmlToVdom encountered unsupported type {type(html)} from {html}"
)
- # Recursively convert the lxml node to a VDOM dict.
+ # Convert the lxml node to a VDOM dict.
vdom = {"tagName": node.tag}
- node_children = [node.text] if node.text else []
- node_children.extend([self.convert(child) for child in node.iterchildren(None)])
- self._set_key_value(vdom, "children", node_children)
+ self._set_key_value(vdom, "children", self._generate_child_vdom(node))
self._set_key_value(vdom, "attributes", dict(node.items()))
self._vdom_mutations(vdom)
@@ -133,3 +132,12 @@ def _vdom_mutations(vdom: Dict):
raise LookupError("Could not find script's contents!")
if vdom["children"][0]:
vdom["key"] = vdom["children"][0]
+
+ def _generate_child_vdom(self, node: etree._Element) -> list:
+ """Recursively generate a list of VDOM children from an lxml node."""
+ children = [node.text] + list(
+ chain(*([self.convert(child), child.tail] for child in node.iterchildren(None)))
+ )
+
+ # Remove None from the list of children from empty text/tail values
+ return list(filter(None, children))
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 49f9c43d4..ccb7c012c 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -67,7 +67,7 @@ def test_html_to_vdom(case):
def test_html_to_vdom_transform():
- source = "
`` block.
"""
- if not isinstance(html, str):
- raise TypeError("html_to_vdom expects a string!")
-
- return _HtmlToVdom(*transforms).convert(html)
-
-
-class _HtmlToVdom:
- def __init__(self, *transforms: _ModelTransform) -> None:
- self.transforms = transforms
-
- def convert(self, html: Union[str, etree._Element]):
- """Convert html string -> lxml node tree -> VDOM dict."""
- # Keep track of whether this is the root node
- root_node = False
-
- # If the user provided a string, convert it to an lxml.etree node.
- if isinstance(html, str):
- parser = etree.HTMLParser()
- node = fragment_fromstring(html, create_parent=True, parser=parser)
- root_node = True
- elif isinstance(html, etree._Element):
- node = html
- else:
- raise TypeError(
- f"HtmlToVdom encountered unsupported type {type(html)} from {html}"
- )
+ # Keep track of whether this is the root node
+ root_node = False
+
+ # If the user provided a string, convert it to an lxml.etree node.
+ if isinstance(html, str):
+ parser = etree.HTMLParser()
+ node = fragment_fromstring(html, create_parent=True, parser=parser)
+ root_node = True
+ elif isinstance(html, etree._Element):
+ node = html
+ else:
+ raise TypeError(
+ f"HtmlToVdom encountered unsupported type {type(html)} from {html}"
+ )
- # Convert the lxml node to a VDOM dict.
- vdom = {
- "tagName": node.tag,
- "children": self._generate_child_vdom(node),
- "attributes": dict(node.items()),
- "eventHandlers": {},
- "importSource": {},
- "key": "",
- "error": "",
- }
- self._vdom_mutations(vdom)
-
- # Apply any provided transforms.
- for transform in self.transforms:
- vdom = transform(vdom)
-
- # The root node is rendered as a React Fragment
- if root_node:
- vdom["tagName"] = ""
-
- # Get rid of empty VDOM fields
- self._prune_vdom_fields(vdom)
-
- return vdom
-
- @staticmethod
- def _vdom_mutations(vdom: Dict):
- """Performs any necessary mutations on the VDOM attributes to meet VDOM spec
- and/or to make elements properly renderable in React."""
- # Convert style attributes to VDOM spec
- if "style" in vdom["attributes"]:
- style = vdom["attributes"]["style"]
- if isinstance(style, str):
- style_dict = {}
- for k, v in (part.split(":", 1) for part in style.split(";") if part):
- title_case_key = k.title().replace("-", "")
- camel_case_key = title_case_key[:1].lower() + title_case_key[1:]
- style_dict[camel_case_key] = v
- vdom["attributes"]["style"] = style_dict
-
- # Set key attribute for scripts to prevent re-execution during re-renders
- if vdom["tagName"] == "script":
- if not isinstance(vdom["children"][0], str):
- # The script's source should always be the first child
- raise LookupError("Could not find script's contents!")
- if vdom["children"][0]:
- vdom["key"] = vdom["children"][0]
-
- @staticmethod
- def _prune_vdom_fields(vdom: Dict):
- """Remove unneeded fields from VDOM dict."""
- if not len(vdom["children"]):
- del vdom["children"]
- if not len(vdom["attributes"]):
- del vdom["attributes"]
- if not len(vdom["eventHandlers"]):
- del vdom["eventHandlers"]
- if not len(vdom["importSource"]):
- del vdom["importSource"]
- if not vdom["key"]:
- del vdom["key"]
- if not vdom["error"]:
- del vdom["error"]
-
- def _generate_child_vdom(self, node: etree._Element) -> list:
- """Recursively generate a list of VDOM children from an lxml node."""
- # Insert text inbetween VDOM children, if necessary
- children = [node.text] + list(
- chain(
- *(
- [self.convert(child), child.tail]
- for child in node.iterchildren(None)
- )
+ # Convert the lxml node to a VDOM dict.
+ vdom = {
+ "tagName": node.tag,
+ "children": _generate_child_vdom(node, transforms),
+ "attributes": dict(node.items()),
+ "eventHandlers": {},
+ "importSource": {},
+ "key": "",
+ "error": "",
+ }
+ _vdom_mutations(vdom)
+
+ # Apply any provided transforms.
+ for transform in transforms:
+ vdom = transform(vdom)
+
+ # The root node is rendered as a React Fragment
+ if root_node:
+ vdom["tagName"] = ""
+
+ # Get rid of empty VDOM fields
+ _prune_vdom_fields(vdom)
+
+ return vdom
+
+
+def _vdom_mutations(vdom: Dict):
+ """Performs any necessary mutations on the VDOM attributes to meet VDOM spec
+ and/or to make elements properly renderable in React."""
+ # Convert style attributes to VDOM spec
+ if "style" in vdom["attributes"]:
+ style = vdom["attributes"]["style"]
+ if isinstance(style, str):
+ style_dict = {}
+ for k, v in (part.split(":", 1) for part in style.split(";") if part):
+ title_case_key = k.title().replace("-", "")
+ camel_case_key = title_case_key[:1].lower() + title_case_key[1:]
+ style_dict[camel_case_key] = v
+ vdom["attributes"]["style"] = style_dict
+
+ # Set key attribute for scripts to prevent re-execution during re-renders
+ if vdom["tagName"] == "script":
+ if not isinstance(vdom["children"][0], str):
+ # The script's source should always be the first child
+ raise LookupError("Could not find script's contents!")
+ if vdom["children"][0]:
+ vdom["key"] = vdom["children"][0]
+
+
+def _prune_vdom_fields(vdom: Dict):
+ """Remove unneeded fields from VDOM dict."""
+ if not len(vdom["children"]):
+ del vdom["children"]
+ if not len(vdom["attributes"]):
+ del vdom["attributes"]
+ if not len(vdom["eventHandlers"]):
+ del vdom["eventHandlers"]
+ if not len(vdom["importSource"]):
+ del vdom["importSource"]
+ if not vdom["key"]:
+ del vdom["key"]
+ if not vdom["error"]:
+ del vdom["error"]
+
+
+def _generate_child_vdom(
+ node: etree._Element, transforms: Iterable[_ModelTransform]
+) -> List:
+ """Recursively generate a list of VDOM children from an lxml node."""
+ # Insert text inbetween VDOM children, if necessary
+ children = [node.text] + list(
+ chain(
+ *(
+ [html_to_vdom(child, *transforms), child.tail]
+ for child in node.iterchildren(None)
)
)
+ )
- # Remove None from the list of children from empty text/tail values
- return list(filter(None, children))
+ # Remove None from the list of children from empty text/tail values
+ return list(filter(None, children))
From 13c7f8ab875a88e0ff6c593013f4c5f6a43ce491 Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Thu, 4 Aug 2022 16:32:00 -0700
Subject: [PATCH 13/53] better function names
---
src/idom/utils.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index 42b8caf68..e26f1d150 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -82,14 +82,14 @@ def html_to_vdom(html: Union[str, etree._Element], *transforms: _ModelTransform)
# Convert the lxml node to a VDOM dict.
vdom = {
"tagName": node.tag,
- "children": _generate_child_vdom(node, transforms),
+ "children": _generate_vdom_children(node, transforms),
"attributes": dict(node.items()),
"eventHandlers": {},
"importSource": {},
"key": "",
"error": "",
}
- _vdom_mutations(vdom)
+ _mutate_vdom(vdom)
# Apply any provided transforms.
for transform in transforms:
@@ -105,7 +105,7 @@ def html_to_vdom(html: Union[str, etree._Element], *transforms: _ModelTransform)
return vdom
-def _vdom_mutations(vdom: Dict):
+def _mutate_vdom(vdom: Dict):
"""Performs any necessary mutations on the VDOM attributes to meet VDOM spec
and/or to make elements properly renderable in React."""
# Convert style attributes to VDOM spec
@@ -144,9 +144,9 @@ def _prune_vdom_fields(vdom: Dict):
del vdom["error"]
-def _generate_child_vdom(
+def _generate_vdom_children(
node: etree._Element, transforms: Iterable[_ModelTransform]
-) -> List:
+) -> List[Union[Dict, str]]:
"""Recursively generate a list of VDOM children from an lxml node."""
# Insert text inbetween VDOM children, if necessary
children = [node.text] + list(
From e56523c4783b3d088f57fcceb15482c2573e8999 Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Thu, 4 Aug 2022 22:38:56 -0700
Subject: [PATCH 14/53] perform _generate_vdom_children in a single pass
---
src/idom/utils.py | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index e26f1d150..1aaeb3b25 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -147,16 +147,14 @@ def _prune_vdom_fields(vdom: Dict):
def _generate_vdom_children(
node: etree._Element, transforms: Iterable[_ModelTransform]
) -> List[Union[Dict, str]]:
- """Recursively generate a list of VDOM children from an lxml node."""
- # Insert text inbetween VDOM children, if necessary
- children = [node.text] + list(
+ """Recursively generate a list of VDOM children from an lxml node.
+ Inserts inner text and/or tail text inbetween VDOM children, if necessary."""
+ return ([node.text] if node.text else []) + list(
chain(
*(
- [html_to_vdom(child, *transforms), child.tail]
+ [html_to_vdom(child, *transforms)]
+ + ([child.tail] if child.tail else [])
for child in node.iterchildren(None)
)
)
)
-
- # Remove None from the list of children from empty text/tail values
- return list(filter(None, children))
From 0924c5f3c96fe0f63074d9c8ec248a48beb62cc5 Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Fri, 5 Aug 2022 01:57:30 -0700
Subject: [PATCH 15/53] add changelog entry
---
docs/source/about/changelog.rst | 2 ++
1 file changed, 2 insertions(+)
diff --git a/docs/source/about/changelog.rst b/docs/source/about/changelog.rst
index a83b0cc29..8a8c5f614 100644
--- a/docs/source/about/changelog.rst
+++ b/docs/source/about/changelog.rst
@@ -25,6 +25,7 @@ Unreleased
**Fixed**
+- :issue:`777` - Fix edge cases where ``html_to_vdom`` can fail to convert HTML
- :issue:`789` - Conditionally rendered components cannot use contexts
- :issue:`773` - Use strict equality check for text, numeric, and binary types in hooks
- :issue:`801` - Accidental mutation of old model causes invalid JSON Patch
@@ -38,6 +39,7 @@ Unreleased
**Added**
- :pull:`123` - ``asgiref`` as a dependency
+- :pull:`795` - ``lxml`` as a dependency
v0.39.0
From f9f169a314a3f93857e935a414e2bdb94d0fd2cc Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Fri, 5 Aug 2022 01:57:45 -0700
Subject: [PATCH 16/53] add null tag test
---
tests/test_utils.py | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index ccb7c012c..f7bd10e30 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -97,3 +97,21 @@ def make_links_blue(node):
"tagName": "",
"children": [expected],
}
+
+
+def test_html_to_vdom_with_null_tag():
+ source = "hello
world
Hello World.
' + + expected = { + "attributes": {"style": {"backgroundColor": "green", "color": "red"}}, + "children": ["A red paragraph."], + "tagName": "p", + } + + assert html_to_vdom(source) == { + "tagName": "", + "children": [expected], + } From e5ca858f3d0c37e526103cda661083a660a06fad Mon Sep 17 00:00:00 2001 From: Archmonger <16909269+Archmonger@users.noreply.github.com> Date: Fri, 5 Aug 2022 02:38:32 -0700 Subject: [PATCH 18/53] fix tests --- tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 3ecd5d37c..b0114f149 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -122,7 +122,7 @@ def test_html_to_vdom_with_style_attr(): expected = { "attributes": {"style": {"backgroundColor": "green", "color": "red"}}, - "children": ["A red paragraph."], + "children": ["Hello World."], "tagName": "p", } From 37b2019359177c7996cc532ea67bb254e25e4b11 Mon Sep 17 00:00:00 2001 From: Archmonger <16909269+Archmonger@users.noreply.github.com> Date: Fri, 5 Aug 2022 02:45:37 -0700 Subject: [PATCH 19/53] remove uneeded strip --- src/idom/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/idom/utils.py b/src/idom/utils.py index 7d179e663..73bf937e1 100644 --- a/src/idom/utils.py +++ b/src/idom/utils.py @@ -113,7 +113,7 @@ def _mutate_vdom(vdom: Dict): vdom["attributes"]["style"] = { _hypen_to_camel_case(key.strip()): value.strip() for key, value in ( - part.strip().split(":", 1) + part.split(":", 1) for part in vdom["attributes"]["style"].split(";") if ":" in part ) From 64c6515631fd4aadd2d164008b62f32fc67aa376 Mon Sep 17 00:00:00 2001 From: Archmonger <16909269+Archmonger@users.noreply.github.com> Date: Fri, 5 Aug 2022 02:59:06 -0700 Subject: [PATCH 20/53] root_node position cleanup --- src/idom/utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/idom/utils.py b/src/idom/utils.py index 73bf937e1..fa17f4459 100644 --- a/src/idom/utils.py +++ b/src/idom/utils.py @@ -64,16 +64,14 @@ def html_to_vdom(html: Union[str, etree._Element], *transforms: _ModelTransform) transform function to add highlighting to a ``
`` block.
"""
- # Keep track of whether this is the root node
- root_node = False
-
# If the user provided a string, convert it to an lxml.etree node.
if isinstance(html, str):
parser = etree.HTMLParser()
node = fragment_fromstring(html, create_parent=True, parser=parser)
- root_node = True
+ root_node = True # Only the root node is a HTML string
elif isinstance(html, etree._Element):
node = html
+ root_node = False
else:
raise TypeError(
f"HtmlToVdom encountered unsupported type {type(html)} from {html}"
From c7bc8eda06fd0ebd80c050995dd0ca90d610c514 Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Fri, 5 Aug 2022 13:18:50 -0700
Subject: [PATCH 21/53] user API only accepts str
---
src/idom/utils.py | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index fa17f4459..2553f8220 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -53,7 +53,7 @@ def __repr__(self) -> str:
return f"{type(self).__name__}({current})"
-def html_to_vdom(html: Union[str, etree._Element], *transforms: _ModelTransform):
+def html_to_vdom(html: str, *transforms: _ModelTransform):
"""Transform HTML into a DOM model
Parameters:
source:
@@ -64,6 +64,10 @@ def html_to_vdom(html: Union[str, etree._Element], *transforms: _ModelTransform)
transform function to add highlighting to a ``
`` block.
"""
+ return _html_to_vdom(html, *transforms)
+
+def _html_to_vdom(html: Union[str, etree._Element], *transforms: _ModelTransform):
+ """A recursive function to convert HTML to a VDOM model"""
# If the user provided a string, convert it to an lxml.etree node.
if isinstance(html, str):
parser = etree.HTMLParser()
@@ -150,7 +154,7 @@ def _generate_vdom_children(
return ([node.text] if node.text else []) + list(
chain(
*(
- [html_to_vdom(child, *transforms)]
+ [_html_to_vdom(child, *transforms)]
+ ([child.tail] if child.tail else [])
for child in node.iterchildren(None)
)
From 4d0c03cd0738a4a6310f056de8596ed8c76d9672 Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Fri, 5 Aug 2022 21:15:51 -0700
Subject: [PATCH 22/53] etree_to_vdom
---
src/idom/utils.py | 44 ++++++++++++++++++++++++--------------------
1 file changed, 24 insertions(+), 20 deletions(-)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index 2553f8220..3a7c70cb9 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -63,23 +63,31 @@ def html_to_vdom(html: str, *transforms: _ModelTransform):
dictionary which will be replaced by ``new``. For example, you could use a
transform function to add highlighting to a ``
`` block.
"""
+ if not isinstance(html, str):
+ raise TypeError(f"Encountered unsupported type {type(html)} from {html}")
- return _html_to_vdom(html, *transforms)
-
-def _html_to_vdom(html: Union[str, etree._Element], *transforms: _ModelTransform):
- """A recursive function to convert HTML to a VDOM model"""
# If the user provided a string, convert it to an lxml.etree node.
- if isinstance(html, str):
- parser = etree.HTMLParser()
- node = fragment_fromstring(html, create_parent=True, parser=parser)
- root_node = True # Only the root node is a HTML string
- elif isinstance(html, etree._Element):
- node = html
- root_node = False
- else:
- raise TypeError(
- f"HtmlToVdom encountered unsupported type {type(html)} from {html}"
- )
+ node = fragment_fromstring(html, create_parent=True, parser=etree.HTMLParser())
+ vdom = etree_to_vdom(node, *transforms)
+
+ # The root node is rendered as a React Fragment, instead of a div
+ vdom["tagName"] = ""
+
+ return vdom
+
+
+def etree_to_vdom(node: etree._Element, *transforms: _ModelTransform):
+ """Recusively transform an lxml etree node into a DOM model
+ Parameters:
+ source:
+ The ``lxml.etree._Element`` node
+ transforms:
+ Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM
+ dictionary which will be replaced by ``new``. For example, you could use a
+ transform function to add highlighting to a ``
`` block.
+ """
+ if not isinstance(node, etree._Element):
+ raise TypeError(f"Encountered unsupported type {type(node)} from {node}")
# Convert the lxml node to a VDOM dict.
vdom = {
@@ -97,10 +105,6 @@ def _html_to_vdom(html: Union[str, etree._Element], *transforms: _ModelTransform
for transform in transforms:
vdom = transform(vdom)
- # The root node is rendered as a React Fragment
- if root_node:
- vdom["tagName"] = ""
-
# Get rid of empty VDOM fields
_prune_vdom_fields(vdom)
@@ -154,7 +158,7 @@ def _generate_vdom_children(
return ([node.text] if node.text else []) + list(
chain(
*(
- [_html_to_vdom(child, *transforms)]
+ [etree_to_vdom(child, *transforms)]
+ ([child.tail] if child.tail else [])
for child in node.iterchildren(None)
)
From 3f7b78e01de9d2f1d4e4eeaacf0ce727063104bb Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Sat, 6 Aug 2022 22:26:38 -0700
Subject: [PATCH 23/53] Try to use existing root node
---
src/idom/utils.py | 28 +++++++++++++++++++++-------
1 file changed, 21 insertions(+), 7 deletions(-)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index 3a7c70cb9..59b9a941b 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -2,7 +2,7 @@
from typing import Any, Callable, Dict, Generic, Iterable, List, TypeVar, Union
from lxml import etree
-from lxml.html import fragment_fromstring
+from lxml.html import fragments_fromstring
_RefValue = TypeVar("_RefValue")
@@ -66,12 +66,26 @@ def html_to_vdom(html: str, *transforms: _ModelTransform):
if not isinstance(html, str):
raise TypeError(f"Encountered unsupported type {type(html)} from {html}")
- # If the user provided a string, convert it to an lxml.etree node.
- node = fragment_fromstring(html, create_parent=True, parser=etree.HTMLParser())
- vdom = etree_to_vdom(node, *transforms)
-
- # The root node is rendered as a React Fragment, instead of a div
- vdom["tagName"] = ""
+ # If the user provided a string, convert it to a list of lxml.etree nodes
+ nodes: List = fragments_fromstring(
+ html, no_leading_text=True, parser=etree.HTMLParser()
+ )
+ has_root_node = len(nodes) == 1
+
+ # Find or create a root node
+ if has_root_node:
+ root_node = nodes[0]
+ else:
+ root_node = etree.Element("div", None, None)
+ for child in nodes:
+ root_node.append(child)
+
+ # Convert the lxml node to a VDOM dict
+ vdom = etree_to_vdom(root_node, *transforms)
+
+ # Change the artificially created root node to a React Fragment, instead of a div
+ if not has_root_node:
+ vdom["tagName"] = ""
return vdom
From d448102895947920c29c423fd0af404271172e46 Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Sat, 6 Aug 2022 22:40:27 -0700
Subject: [PATCH 24/53] test_html_to_vdom_with_no_parent_node
---
tests/test_utils.py | 36 ++++++++++++++++++++++--------------
1 file changed, 22 insertions(+), 14 deletions(-)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index b0114f149..f2826da3e 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -60,10 +60,7 @@ def test_ref_repr():
],
)
def test_html_to_vdom(case):
- assert html_to_vdom(case["source"]) == {
- "tagName": "",
- "children": [case["model"]],
- }
+ assert html_to_vdom(case["source"]) == case["model"]
def test_html_to_vdom_transform():
@@ -93,10 +90,7 @@ def make_links_blue(node):
],
}
- assert html_to_vdom(source, make_links_blue) == {
- "tagName": "",
- "children": [expected],
- }
+ assert html_to_vdom(source, make_links_blue) == expected
def test_html_to_vdom_with_null_tag():
@@ -111,10 +105,7 @@ def test_html_to_vdom_with_null_tag():
],
}
- assert html_to_vdom(source) == {
- "tagName": "",
- "children": [expected],
- }
+ assert html_to_vdom(source) == expected
def test_html_to_vdom_with_style_attr():
@@ -126,7 +117,24 @@ def test_html_to_vdom_with_style_attr():
"tagName": "p",
}
- assert html_to_vdom(source) == {
+ assert html_to_vdom(source) == expected
+
+
+def test_html_to_vdom_with_no_parent_node():
+ source = "Hello
`` block.
+ recover:
+ If ``True``, try to repair broken HTML. This may result in parsing invalid
+ HTML as plain text.
"""
if not isinstance(html, str):
raise TypeError(f"Encountered unsupported type {type(html)} from {html}")
# If the user provided a string, convert it to a list of lxml.etree nodes
parser = etree.HTMLParser(
- remove_comments=True, remove_pis=True, remove_blank_text=True
+ remove_comments=True,
+ remove_pis=True,
+ remove_blank_text=True,
+ recover=recover,
)
nodes: List = fragments_fromstring(html, no_leading_text=True, parser=parser)
has_root_node = len(nodes) == 1
From e464277cae689b4a615d447064ea6f2d2d6e4e86 Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Mon, 8 Aug 2022 11:34:41 -0700
Subject: [PATCH 28/53] make etree to vdom private
---
src/idom/utils.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index 369e7ff23..d7974629e 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -88,7 +88,7 @@ def html_to_vdom(html: str, *transforms: _ModelTransform, recover: bool = False)
root_node.append(child)
# Convert the lxml node to a VDOM dict
- vdom = etree_to_vdom(root_node, *transforms)
+ vdom = _etree_to_vdom(root_node, *transforms)
# Change the artificially created root node to a React Fragment, instead of a div
if not has_root_node:
@@ -97,7 +97,7 @@ def html_to_vdom(html: str, *transforms: _ModelTransform, recover: bool = False)
return vdom
-def etree_to_vdom(node: etree._Element, *transforms: _ModelTransform):
+def _etree_to_vdom(node: etree._Element, *transforms: _ModelTransform):
"""Recusively transform an lxml etree node into a DOM model
Parameters:
source:
@@ -179,7 +179,7 @@ def _generate_vdom_children(
return ([node.text] if node.text else []) + list(
chain(
*(
- [etree_to_vdom(child, *transforms)]
+ [_etree_to_vdom(child, *transforms)]
+ ([child.tail] if child.tail else [])
for child in node.iterchildren(None)
)
From 0ddf937d99997833d50d6012ca85fc5be2261583 Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Mon, 8 Aug 2022 11:44:10 -0700
Subject: [PATCH 29/53] Remove recover parameter
---
src/idom/utils.py | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index d7974629e..7c072dec3 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -53,7 +53,7 @@ def __repr__(self) -> str:
return f"{type(self).__name__}({current})"
-def html_to_vdom(html: str, *transforms: _ModelTransform, recover: bool = False) -> Dict:
+def html_to_vdom(html: str, *transforms: _ModelTransform) -> Dict:
"""Transform HTML into a DOM model
Parameters:
source:
@@ -62,9 +62,6 @@ def html_to_vdom(html: str, *transforms: _ModelTransform, recover: bool = False)
Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM
dictionary which will be replaced by ``new``. For example, you could use a
transform function to add highlighting to a ``
`` block.
- recover:
- If ``True``, try to repair broken HTML. This may result in parsing invalid
- HTML as plain text.
"""
if not isinstance(html, str):
raise TypeError(f"Encountered unsupported type {type(html)} from {html}")
@@ -74,7 +71,7 @@ def html_to_vdom(html: str, *transforms: _ModelTransform, recover: bool = False)
remove_comments=True,
remove_pis=True,
remove_blank_text=True,
- recover=recover,
+ recover=False,
)
nodes: List = fragments_fromstring(html, no_leading_text=True, parser=parser)
has_root_node = len(nodes) == 1
@@ -97,7 +94,7 @@ def html_to_vdom(html: str, *transforms: _ModelTransform, recover: bool = False)
return vdom
-def _etree_to_vdom(node: etree._Element, *transforms: _ModelTransform):
+def _etree_to_vdom(node: etree._Element, *transforms: _ModelTransform) -> Dict:
"""Recusively transform an lxml etree node into a DOM model
Parameters:
source:
From f70cfc51437e35dee5bafd45e82859384f76deef Mon Sep 17 00:00:00 2001
From: Mark <16909269+Archmonger@users.noreply.github.com>
Date: Tue, 9 Aug 2022 13:50:32 -0700
Subject: [PATCH 30/53] Update src/idom/utils.py
Co-authored-by: Ryan Morshead
`` block.
"""
- if not isinstance(html, str):
+ if not isinstance(html, str): # pragma: no cover
raise TypeError(f"Encountered unsupported type {type(html)} from {html}")
# If the user provided a string, convert it to a list of lxml.etree nodes
@@ -109,7 +109,7 @@ def _etree_to_vdom(node: etree._Element, transforms: Iterable[_ModelTransform])
dictionary which will be replaced by ``new``. For example, you could use a
transform function to add highlighting to a ``
`` block.
"""
- if not isinstance(node, etree._Element):
+ if not isinstance(node, etree._Element): # pragma: no cover
raise TypeError(f"Encountered unsupported type {type(node)} from {node}")
# This will recursively call _etree_to_vdom() on all children
From dd23e88484f0b925a0c48a77c24c3a334a2b8acc Mon Sep 17 00:00:00 2001
From: Mark <16909269+Archmonger@users.noreply.github.com>
Date: Sat, 13 Aug 2022 14:57:55 -0700
Subject: [PATCH 41/53] Update src/idom/utils.py
Co-authored-by: Ryan Morshead
`` block.
"""
if not isinstance(html, str): # pragma: no cover
- raise TypeError(f"Encountered unsupported type {type(html)} from {html}")
+ raise TypeError(f"Expected html to be a string, not {type(html).__name__}")
# If the user provided a string, convert it to a list of lxml.etree nodes
parser = etree.HTMLParser(
@@ -114,7 +114,7 @@ def _etree_to_vdom(
transform function to add highlighting to a ``
`` block.
"""
if not isinstance(node, etree._Element): # pragma: no cover
- raise TypeError(f"Encountered unsupported type {type(node)} from {node}")
+ raise TypeError(f"Expected node to be a etree._Element, not {type(node).__name__}")
# This will recursively call _etree_to_vdom() on all children
children = _generate_vdom_children(node, transforms)
From 4ca1f116e87913fddb61126502c7768d9bf6fecb Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Sat, 13 Aug 2022 17:01:14 -0700
Subject: [PATCH 47/53] Convince type checker that it's safe to mutate
attributes
---
src/idom/utils.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index 3b8c289bd..077e0e8da 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -158,6 +158,9 @@ def _mutate_vdom(vdom: VdomDict):
and "style" in vdom["attributes"]
and isinstance(vdom["attributes"]["style"], str)
):
+ # Convince type checker that it's safe to mutate attributes
+ assert isinstance(vdom["attributes"], dict)
+
# Convert style attribute from str -> dict with camelCase keys
vdom["attributes"]["style"] = {
_hypen_to_camel_case(key.strip()): value.strip()
From cf532f996add93452309203e2a1ea5c9abaa074c Mon Sep 17 00:00:00 2001
From: Ryan Morshead hello
world
`` block.
"""
if not isinstance(node, etree._Element): # pragma: no cover
- raise TypeError(f"Expected node to be a etree._Element, not {type(node).__name__}")
+ raise TypeError(
+ f"Expected node to be a etree._Element, not {type(node).__name__}"
+ )
# This will recursively call _etree_to_vdom() on all children
children = _generate_vdom_children(node, transforms)
From 1a23a01b09e8f62195176d27ebca2125f48341c7 Mon Sep 17 00:00:00 2001
From: Archmonger <16909269+Archmonger@users.noreply.github.com>
Date: Sat, 13 Aug 2022 19:42:17 -0700
Subject: [PATCH 50/53] Add strict parameter
---
src/idom/utils.py | 31 ++++++++++++++++++++++++++-----
tests/test_utils.py | 7 +++++--
2 files changed, 31 insertions(+), 7 deletions(-)
diff --git a/src/idom/utils.py b/src/idom/utils.py
index 9ba236ddd..a3057324e 100644
--- a/src/idom/utils.py
+++ b/src/idom/utils.py
@@ -1,11 +1,11 @@
from itertools import chain
-from typing import Any, Callable, Dict, Generic, Iterable, List, TypeVar, Union
+from typing import Any, Callable, Generic, Iterable, List, TypeVar, Union
from lxml import etree
from lxml.html import fragments_fromstring
import idom
-from idom.core.vdom import VdomDict
+from idom.core.types import VdomDict
_RefValue = TypeVar("_RefValue")
@@ -56,7 +56,7 @@ def __repr__(self) -> str:
return f"{type(self).__name__}({current})"
-def html_to_vdom(html: str, *transforms: _ModelTransform) -> VdomDict:
+def html_to_vdom(html: str, *transforms: _ModelTransform, strict=True) -> VdomDict:
"""Transform HTML into a DOM model. Unique keys can be provided to HTML elements
using a ``key=...`` attribute within your HTML tag.
@@ -67,15 +67,32 @@ def html_to_vdom(html: str, *transforms: _ModelTransform) -> VdomDict:
Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM
dictionary which will be replaced by ``new``. For example, you could use a
transform function to add highlighting to a ``
`` block.
+ strict:
+ If ``True``, raise an exception if the HTML does not perfectly follow HTML5
+ syntax.
"""
if not isinstance(html, str): # pragma: no cover
raise TypeError(f"Expected html to be a string, not {type(html).__name__}")
# If the user provided a string, convert it to a list of lxml.etree nodes
parser = etree.HTMLParser(
- remove_comments=True, remove_pis=True, remove_blank_text=True
+ remove_comments=True,
+ remove_pis=True,
+ remove_blank_text=True,
+ recover=not strict,
)
- nodes: List = fragments_fromstring(html, no_leading_text=True, parser=parser)
+ try:
+ nodes: List = fragments_fromstring(html, no_leading_text=True, parser=parser)
+ except etree.XMLSyntaxError as e:
+ if not strict:
+ raise e
+ raise HTMLParseError(
+ "An error has occurred while parsing the HTML.\n\n"
+ "This HTML may be malformatted, or may not perfectly adhere to HTML5.\n"
+ "If you believe the exception above was intentional, "
+ "you can disable the strict parameter on html_to_vdom().\n"
+ "Otherwise, repair your broken HTML and try again."
+ ) from e
has_root_node = len(nodes) == 1
# Find or create a root node
@@ -197,3 +214,7 @@ def _hypen_to_camel_case(string: str) -> str:
"""Convert a hypenated string to camelCase."""
first, _, remainder = string.partition("-")
return first.lower() + remainder.title().replace("-", "")
+
+
+class HTMLParseError(etree.LxmlSyntaxError):
+ """Raised when an HTML document cannot be parsed using strict parsing."""
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 671a61908..861fc315d 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,7 +1,7 @@
import pytest
import idom
-from idom.utils import html_to_vdom
+from idom.utils import HTMLParseError, html_to_vdom
def test_basic_ref_behavior():
@@ -104,7 +104,10 @@ def test_non_html_tag_behavior():
],
}
- assert html_to_vdom(source) == expected
+ assert html_to_vdom(source, strict=False) == expected
+
+ with pytest.raises(HTMLParseError):
+ html_to_vdom(source, strict=True)
def test_html_to_vdom_with_null_tag():
From 07b3470382cd7d0768444709cbb8fa46df76cb12 Mon Sep 17 00:00:00 2001
From: Ryan Morshead