Skip to content

Commit eae8169

Browse files
committed
Use set instead of list for block level elements
Using a set allows for better performances when checking for membership of a tag within block level elements. Issue-1507: #1507
1 parent 6347c57 commit eae8169

File tree

5 files changed

+770
-37
lines changed

5 files changed

+770
-37
lines changed

markdown/core.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class Markdown:
5050
Attributes:
5151
Markdown.tab_length (int): The number of spaces which correspond to a single tab. Default: `4`.
5252
Markdown.ESCAPED_CHARS (list[str]): List of characters which get the backslash escape treatment.
53-
Markdown.block_level_elements (list[str]): List of HTML tags which get treated as block-level elements.
53+
Markdown.block_level_elements (set[str]): Set of HTML tags which get treated as block-level elements.
5454
See [`markdown.util.BLOCK_LEVEL_ELEMENTS`][] for the full list of elements.
5555
Markdown.registeredExtensions (list[Extension]): List of extensions which have called
5656
[`registerExtension`][markdown.Markdown.registerExtension] during setup.
@@ -113,7 +113,12 @@ def __init__(self, **kwargs):
113113
]
114114
""" List of characters which get the backslash escape treatment. """
115115

116-
self.block_level_elements: list[str] = BLOCK_LEVEL_ELEMENTS.copy()
116+
# `BLOCK_LEVEL_ELEMENTS` is actually a hybrid list/set container.
117+
# It supports list methods for backwards compatibility.
118+
# We explicitly lie here, so that users running type checkers will get
119+
# warnings when they use the container as a list. This is a very effective
120+
# way of communicating the change, and deprecating list-like usage.
121+
self.block_level_elements: set[str] = BLOCK_LEVEL_ELEMENTS.copy()
117122

118123
self.registeredExtensions: list[Extension] = []
119124
self.docType = "" # TODO: Maybe delete this. It does not appear to be used anymore.

markdown/extensions/md_in_html.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ class HTMLExtractorExtra(HTMLExtractor):
4242

4343
def __init__(self, md: Markdown, *args, **kwargs):
4444
# All block-level tags.
45-
self.block_level_tags = set(md.block_level_elements.copy())
45+
# TODO: Use `md.block_level_elements.copy()` when it becomes a regular set.
46+
self.block_level_tags = set(md.block_level_elements)
4647
# Block-level tags in which the content only gets span level parsing
4748
self.span_tags = set(
4849
['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'summary', 'td', 'th']

markdown/util.py

Lines changed: 278 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
import warnings
3030
from functools import wraps, lru_cache
3131
from itertools import count
32-
from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload
32+
from typing import TYPE_CHECKING, Callable, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload
3333

3434
if TYPE_CHECKING: # pragma: no cover
3535
from markdown import Markdown
@@ -38,13 +38,283 @@
3838
_T = TypeVar('_T')
3939

4040

41-
"""
42-
Constants you might want to modify
43-
-----------------------------------------------------------------------------
44-
"""
41+
def deprecated(message: str, stacklevel: int = 2):
42+
"""
43+
Raise a [`DeprecationWarning`][] when wrapped function/method is called.
44+
45+
Usage:
4546
47+
```python
48+
@deprecated("This method will be removed in version X; use Y instead.")
49+
def some_method():
50+
pass
51+
```
52+
"""
53+
def wrapper(func):
54+
@wraps(func)
55+
def deprecated_func(*args, **kwargs):
56+
warnings.warn(
57+
f"'{func.__name__}' is deprecated. {message}",
58+
category=DeprecationWarning,
59+
stacklevel=stacklevel
60+
)
61+
return func(*args, **kwargs)
62+
return deprecated_func
63+
return wrapper
64+
65+
66+
# TODO: Raise errors from list methods in the future.
67+
# Later, remove this class entirely and use a regular set.
68+
class _BlockLevelElements(list):
69+
# This hybrid list/set container exists for backwards compatibility reasons,
70+
# to support using both the `BLOCK_LEVEL_ELEMENTS` global variable (soft-deprecated)
71+
# and the `Markdown.block_level_elements` instance attribute (preferred) as a list or a set.
72+
# When we stop supporting list methods on these objects, we can remove the container
73+
# as well as the `test_block_level_elements` test module.
74+
75+
def __init__(self, elements: list[str], /) -> None:
76+
self._list = elements.copy()
77+
self._set = set(self._list)
78+
79+
@deprecated("Using block level elements as a list is deprecated, use it as a set instead.")
80+
def __add__(self, other: list[str], /) -> list[str]:
81+
# Using `+` means user expects a list back.
82+
return self._list + other
83+
84+
def __and__(self, other: set[str], /) -> set[str]:
85+
# Using `&` means user expects a set back.
86+
return self._set & other
87+
88+
def __contains__(self, item: str, /) -> bool:
89+
return item in self._set
90+
91+
@deprecated("Using block level elements as a list is deprecated, use it as a set instead.")
92+
def __delitem__(self, index: int, /) -> None:
93+
element = self._list[index]
94+
del self._list[index]
95+
# Only remove from set if absent from list.
96+
if element not in self._list:
97+
self._set.remove(element)
98+
99+
@deprecated("Using block level elements as a list is deprecated, use it as a set instead.")
100+
def __getitem__(self, index: int, /) -> str:
101+
return self._list[index]
102+
103+
@deprecated("Using block level elements as a list is deprecated, use it as a set instead.")
104+
def __iadd__(self, other: list[str], /) -> set[str]:
105+
# In-place addition should update both list and set.
106+
self._list += other
107+
self._set.update(set(other))
108+
return self # type: ignore[return-value]
109+
110+
def __iand__(self, other: set[str], /) -> set[str]:
111+
# In-place intersection should update both list and set.
112+
self._set &= other
113+
# Elements were only removed.
114+
self._list[:] = [element for element in self._list if element in self._set]
115+
return self # type: ignore[return-value]
116+
117+
def __ior__(self, other: set[str], /) -> set[str]:
118+
# In-place union should update both list and set.
119+
self._set |= other
120+
# Elements were only added.
121+
self._list.extend(element for element in sorted(self._set - set(self._list)))
122+
return self # type: ignore[return-value]
123+
124+
def __iter__(self) -> Iterator[str]:
125+
return iter(self._list)
126+
127+
def __len__(self) -> int:
128+
# Length of the list, for backwards compatibility.
129+
# If used as a set, both lengths will be the same.
130+
return len(self._list)
131+
132+
def __or__(self, value: set[str], /) -> set[str]:
133+
# Using `|` means user expects a set back.
134+
return self._set | value
135+
136+
def __rand__(self, value: set[str], /) -> set[str]:
137+
# Using `&` means user expects a set back.
138+
return value & self._set
139+
140+
def __ror__(self, value: set[str], /) -> set[str]:
141+
# Using `|` means user expects a set back.
142+
return value | self._set
143+
144+
def __rsub__(self, value: set[str], /) -> set[str]:
145+
# Using `-` means user expects a set back.
146+
return value - self._set
147+
148+
def __rxor__(self, value: set[str], /) -> set[str]:
149+
# Using `^` means user expects a set back.
150+
return value ^ self._set
151+
152+
def __sub__(self, value: set[str], /) -> set[str]:
153+
# Using `-` means user expects a set back.
154+
return self._set - value
155+
156+
def __xor__(self, value: set[str], /) -> set[str]:
157+
# Using `^` means user expects a set back.
158+
return self._set ^ value
159+
160+
@deprecated("Using block level elements as a list is deprecated, use it as a set instead.")
161+
def __reversed__(self) -> Iterator[str]:
162+
return reversed(self._list)
163+
164+
@deprecated("Using block level elements as a list is deprecated, use it as a set instead.")
165+
def __setitem__(self, index: int, value: str, /) -> None:
166+
# In-place item-setting should update both list and set.
167+
old = self._list[index]
168+
self._list[index] = value
169+
# Only remove from set if absent from list.
170+
if old not in self._list:
171+
self._set.discard(old)
172+
self._set.add(value)
173+
174+
def __str__(self) -> str:
175+
return str(self._set)
176+
177+
def add(self, element: str, /) -> None:
178+
# In-place addition should update both list and set.
179+
self._set.add(element)
180+
self._list.append(element)
181+
182+
@deprecated("Using block level elements as a list is deprecated, use it as a set instead.")
183+
def append(self, element: str, /) -> None:
184+
# In-place addition should update both list and set.
185+
self._list.append(element)
186+
self._set.add(element)
187+
188+
def clear(self) -> None:
189+
self._list.clear()
190+
self._set.clear()
191+
192+
def copy(self) -> _BlockLevelElements:
193+
# We're not sure yet whether the user wants to use it as a set or list.
194+
return _BlockLevelElements(self._list)
195+
196+
@deprecated("Using block level elements as a list is deprecated, use it as a set instead.")
197+
def count(self, value: str, /) -> int:
198+
# Count in list, for backwards compatibility.
199+
# If used as a set, both counts will be the same (1).
200+
return self._list.count(value)
201+
202+
def difference(self, *others: set[str]) -> set[str]:
203+
# User expects a set back.
204+
return self._set.difference(*others)
205+
206+
def difference_update(self, *others: set[str]) -> None:
207+
# In-place difference should update both list and set.
208+
self._set.difference_update(*others)
209+
# Elements were only removed.
210+
self._list[:] = [element for element in self._list if element in self._set]
211+
212+
def discard(self, element: str, /) -> None:
213+
# In-place discard should update both list and set.
214+
self._set.discard(element)
215+
try:
216+
self._list.remove(element)
217+
except ValueError:
218+
pass
219+
220+
@deprecated("Using block level elements as a list is deprecated, use it as a set instead.")
221+
def extend(self, elements: list[str], /) -> None:
222+
# In-place extension should update both list and set.
223+
self._list.extend(elements)
224+
self._set.update(elements)
225+
226+
@deprecated("Using block level elements as a list is deprecated, use it as a set instead.")
227+
def index(self, value, start: int = 0, stop: int = sys.maxsize, /):
228+
return self._list.index(value, start, stop)
229+
230+
@deprecated("Using block level elements as a list is deprecated, use it as a set instead.")
231+
def insert(self, index: int, element: str, /) -> None:
232+
# In-place insertion should update both list and set.
233+
self._list.insert(index, element)
234+
self._set.add(element)
235+
236+
def intersection(self, *others: set[str]) -> set[str]:
237+
# User expects a set back.
238+
return self._set.intersection(*others)
239+
240+
def intersection_update(self, *others: set[str]) -> None:
241+
# In-place intersection should update both list and set.
242+
self._set.intersection_update(*others)
243+
# Elements were only removed.
244+
self._list[:] = [element for element in self._list if element in self._set]
245+
246+
def isdisjoint(self, other: set[str], /) -> bool:
247+
return self._set.isdisjoint(other)
248+
249+
def issubset(self, other: set[str], /) -> bool:
250+
return self._set.issubset(other)
251+
252+
def issuperset(self, other: set[str], /) -> bool:
253+
return self._set.issuperset(other)
254+
255+
def pop(self, index: int | None = None, /) -> str:
256+
# In-place pop should update both list and set.
257+
if index is None:
258+
index = -1
259+
else:
260+
warnings.warn(
261+
"Using block level elements as a list is deprecated, use it as a set instead.",
262+
DeprecationWarning,
263+
)
264+
element = self._list.pop(index)
265+
# Only remove from set if absent from list.
266+
if element not in self._list:
267+
self._set.remove(element)
268+
return element
269+
270+
def remove(self, element: str, /) -> None:
271+
# In-place removal should update both list and set.
272+
# We give precedence to set behavior, so we remove all occurrences from the list.
273+
while True:
274+
try:
275+
self._list.remove(element)
276+
except ValueError:
277+
break
278+
self._set.remove(element)
279+
280+
@deprecated("Using block level elements as a list is deprecated, use it as a set instead.")
281+
def reverse(self) -> None:
282+
self._list.reverse()
283+
284+
@deprecated("Using block level elements as a list is deprecated, use it as a set instead.")
285+
def sort(self, /, *, key: Callable | None = None, reverse: bool = False) -> None:
286+
self._list.sort(key=key, reverse=reverse)
287+
288+
def symmetric_difference(self, other: set[str], /) -> set[str]:
289+
# User expects a set back.
290+
return self._set.symmetric_difference(other)
291+
292+
def symmetric_difference_update(self, other: set[str], /) -> None:
293+
# In-place symmetric difference should update both list and set.
294+
self._set.symmetric_difference_update(other)
295+
# Elements were both removed and added.
296+
self._list[:] = [element for element in self._list if element in self._set]
297+
self._list.extend(element for element in sorted(self._set - set(self._list)))
298+
299+
def union(self, *others: set[str]) -> set[str]:
300+
# User expects a set back.
301+
return self._set.union(*others)
302+
303+
def update(self, *others: set[str]) -> None:
304+
# In-place union should update both list and set.
305+
self._set.update(*others)
306+
# Elements were only added.
307+
self._list.extend(element for element in sorted(self._set - set(self._list)))
308+
309+
310+
# Constants you might want to modify
311+
# -----------------------------------------------------------------------------
46312

47-
BLOCK_LEVEL_ELEMENTS: list[str] = [
313+
# Type it as `set[str]` to express our intent for it to be used as such.
314+
# We explicitly lie here, so that users running type checkers will get
315+
# warnings when they use the container as a list. This is a very effective
316+
# way of communicating the change, and deprecating list-like usage.
317+
BLOCK_LEVEL_ELEMENTS: set[str] = _BlockLevelElements([
48318
# Elements which are invalid to wrap in a `<p>` tag.
49319
# See https://w3c.github.io/html/grouping-content.html#the-p-element
50320
'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl',
@@ -56,9 +326,9 @@
56326
'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script',
57327
'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video',
58328
'center'
59-
]
329+
]) # type: ignore[assignment]
60330
"""
61-
List of HTML tags which get treated as block-level elements. Same as the `block_level_elements`
331+
Set of HTML tags which get treated as block-level elements. Same as the `block_level_elements`
62332
attribute of the [`Markdown`][markdown.Markdown] class. Generally one should use the
63333
attribute on the class. This remains for compatibility with older extensions.
64334
"""
@@ -111,31 +381,6 @@ def get_installed_extensions():
111381
return metadata.entry_points(group='markdown.extensions')
112382

113383

114-
def deprecated(message: str, stacklevel: int = 2):
115-
"""
116-
Raise a [`DeprecationWarning`][] when wrapped function/method is called.
117-
118-
Usage:
119-
120-
```python
121-
@deprecated("This method will be removed in version X; use Y instead.")
122-
def some_method():
123-
pass
124-
```
125-
"""
126-
def wrapper(func):
127-
@wraps(func)
128-
def deprecated_func(*args, **kwargs):
129-
warnings.warn(
130-
f"'{func.__name__}' is deprecated. {message}",
131-
category=DeprecationWarning,
132-
stacklevel=stacklevel
133-
)
134-
return func(*args, **kwargs)
135-
return deprecated_func
136-
return wrapper
137-
138-
139384
def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None:
140385
"""Parses a string representing a boolean value. If parsing was successful,
141386
returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`,

tests/test_apis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -920,7 +920,7 @@ class TestBlockAppend(unittest.TestCase):
920920
def testBlockAppend(self):
921921
""" Test that appended escapes are only in the current instance. """
922922
md = markdown.Markdown()
923-
md.block_level_elements.append('test')
923+
md.block_level_elements.add('test')
924924
self.assertEqual('test' in md.block_level_elements, True)
925925
md2 = markdown.Markdown()
926926
self.assertEqual('test' not in md2.block_level_elements, True)

0 commit comments

Comments
 (0)