From eae8169b18f2baafe1ccbdbf147f46af785a56cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Fri, 21 Feb 2025 16:25:41 +0100 Subject: [PATCH 1/4] Use set instead of list for block level elements Using a set allows for better performances when checking for membership of a tag within block level elements. Issue-1507: https://github.com/Python-Markdown/markdown/issues/1507 --- markdown/core.py | 9 +- markdown/extensions/md_in_html.py | 3 +- markdown/util.py | 311 +++++++++++++++++-- tests/test_apis.py | 2 +- tests/test_block_level_elements.py | 482 +++++++++++++++++++++++++++++ 5 files changed, 770 insertions(+), 37 deletions(-) create mode 100644 tests/test_block_level_elements.py diff --git a/markdown/core.py b/markdown/core.py index 6c7a21be9..675b01e63 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -50,7 +50,7 @@ class Markdown: Attributes: Markdown.tab_length (int): The number of spaces which correspond to a single tab. Default: `4`. Markdown.ESCAPED_CHARS (list[str]): List of characters which get the backslash escape treatment. - Markdown.block_level_elements (list[str]): List of HTML tags which get treated as block-level elements. + Markdown.block_level_elements (set[str]): Set of HTML tags which get treated as block-level elements. See [`markdown.util.BLOCK_LEVEL_ELEMENTS`][] for the full list of elements. Markdown.registeredExtensions (list[Extension]): List of extensions which have called [`registerExtension`][markdown.Markdown.registerExtension] during setup. @@ -113,7 +113,12 @@ def __init__(self, **kwargs): ] """ List of characters which get the backslash escape treatment. """ - self.block_level_elements: list[str] = BLOCK_LEVEL_ELEMENTS.copy() + # `BLOCK_LEVEL_ELEMENTS` is actually a hybrid list/set container. + # It supports list methods for backwards compatibility. + # We explicitly lie here, so that users running type checkers will get + # warnings when they use the container as a list. This is a very effective + # way of communicating the change, and deprecating list-like usage. + self.block_level_elements: set[str] = BLOCK_LEVEL_ELEMENTS.copy() self.registeredExtensions: list[Extension] = [] self.docType = "" # TODO: Maybe delete this. It does not appear to be used anymore. diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index d1fbd7af5..918d98564 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -42,7 +42,8 @@ class HTMLExtractorExtra(HTMLExtractor): def __init__(self, md: Markdown, *args, **kwargs): # All block-level tags. - self.block_level_tags = set(md.block_level_elements.copy()) + # TODO: Use `md.block_level_elements.copy()` when it becomes a regular set. + self.block_level_tags = set(md.block_level_elements) # Block-level tags in which the content only gets span level parsing self.span_tags = set( ['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'summary', 'td', 'th'] diff --git a/markdown/util.py b/markdown/util.py index f547721eb..c05b3a727 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -29,7 +29,7 @@ import warnings from functools import wraps, lru_cache from itertools import count -from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload +from typing import TYPE_CHECKING, Callable, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload if TYPE_CHECKING: # pragma: no cover from markdown import Markdown @@ -38,13 +38,283 @@ _T = TypeVar('_T') -""" -Constants you might want to modify ------------------------------------------------------------------------------ -""" +def deprecated(message: str, stacklevel: int = 2): + """ + Raise a [`DeprecationWarning`][] when wrapped function/method is called. + + Usage: + ```python + @deprecated("This method will be removed in version X; use Y instead.") + def some_method(): + pass + ``` + """ + def wrapper(func): + @wraps(func) + def deprecated_func(*args, **kwargs): + warnings.warn( + f"'{func.__name__}' is deprecated. {message}", + category=DeprecationWarning, + stacklevel=stacklevel + ) + return func(*args, **kwargs) + return deprecated_func + return wrapper + + +# TODO: Raise errors from list methods in the future. +# Later, remove this class entirely and use a regular set. +class _BlockLevelElements(list): + # This hybrid list/set container exists for backwards compatibility reasons, + # to support using both the `BLOCK_LEVEL_ELEMENTS` global variable (soft-deprecated) + # and the `Markdown.block_level_elements` instance attribute (preferred) as a list or a set. + # When we stop supporting list methods on these objects, we can remove the container + # as well as the `test_block_level_elements` test module. + + def __init__(self, elements: list[str], /) -> None: + self._list = elements.copy() + self._set = set(self._list) + + @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") + def __add__(self, other: list[str], /) -> list[str]: + # Using `+` means user expects a list back. + return self._list + other + + def __and__(self, other: set[str], /) -> set[str]: + # Using `&` means user expects a set back. + return self._set & other + + def __contains__(self, item: str, /) -> bool: + return item in self._set + + @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") + def __delitem__(self, index: int, /) -> None: + element = self._list[index] + del self._list[index] + # Only remove from set if absent from list. + if element not in self._list: + self._set.remove(element) + + @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") + def __getitem__(self, index: int, /) -> str: + return self._list[index] + + @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") + def __iadd__(self, other: list[str], /) -> set[str]: + # In-place addition should update both list and set. + self._list += other + self._set.update(set(other)) + return self # type: ignore[return-value] + + def __iand__(self, other: set[str], /) -> set[str]: + # In-place intersection should update both list and set. + self._set &= other + # Elements were only removed. + self._list[:] = [element for element in self._list if element in self._set] + return self # type: ignore[return-value] + + def __ior__(self, other: set[str], /) -> set[str]: + # In-place union should update both list and set. + self._set |= other + # Elements were only added. + self._list.extend(element for element in sorted(self._set - set(self._list))) + return self # type: ignore[return-value] + + def __iter__(self) -> Iterator[str]: + return iter(self._list) + + def __len__(self) -> int: + # Length of the list, for backwards compatibility. + # If used as a set, both lengths will be the same. + return len(self._list) + + def __or__(self, value: set[str], /) -> set[str]: + # Using `|` means user expects a set back. + return self._set | value + + def __rand__(self, value: set[str], /) -> set[str]: + # Using `&` means user expects a set back. + return value & self._set + + def __ror__(self, value: set[str], /) -> set[str]: + # Using `|` means user expects a set back. + return value | self._set + + def __rsub__(self, value: set[str], /) -> set[str]: + # Using `-` means user expects a set back. + return value - self._set + + def __rxor__(self, value: set[str], /) -> set[str]: + # Using `^` means user expects a set back. + return value ^ self._set + + def __sub__(self, value: set[str], /) -> set[str]: + # Using `-` means user expects a set back. + return self._set - value + + def __xor__(self, value: set[str], /) -> set[str]: + # Using `^` means user expects a set back. + return self._set ^ value + + @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") + def __reversed__(self) -> Iterator[str]: + return reversed(self._list) + + @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") + def __setitem__(self, index: int, value: str, /) -> None: + # In-place item-setting should update both list and set. + old = self._list[index] + self._list[index] = value + # Only remove from set if absent from list. + if old not in self._list: + self._set.discard(old) + self._set.add(value) + + def __str__(self) -> str: + return str(self._set) + + def add(self, element: str, /) -> None: + # In-place addition should update both list and set. + self._set.add(element) + self._list.append(element) + + @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") + def append(self, element: str, /) -> None: + # In-place addition should update both list and set. + self._list.append(element) + self._set.add(element) + + def clear(self) -> None: + self._list.clear() + self._set.clear() + + def copy(self) -> _BlockLevelElements: + # We're not sure yet whether the user wants to use it as a set or list. + return _BlockLevelElements(self._list) + + @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") + def count(self, value: str, /) -> int: + # Count in list, for backwards compatibility. + # If used as a set, both counts will be the same (1). + return self._list.count(value) + + def difference(self, *others: set[str]) -> set[str]: + # User expects a set back. + return self._set.difference(*others) + + def difference_update(self, *others: set[str]) -> None: + # In-place difference should update both list and set. + self._set.difference_update(*others) + # Elements were only removed. + self._list[:] = [element for element in self._list if element in self._set] + + def discard(self, element: str, /) -> None: + # In-place discard should update both list and set. + self._set.discard(element) + try: + self._list.remove(element) + except ValueError: + pass + + @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") + def extend(self, elements: list[str], /) -> None: + # In-place extension should update both list and set. + self._list.extend(elements) + self._set.update(elements) + + @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") + def index(self, value, start: int = 0, stop: int = sys.maxsize, /): + return self._list.index(value, start, stop) + + @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") + def insert(self, index: int, element: str, /) -> None: + # In-place insertion should update both list and set. + self._list.insert(index, element) + self._set.add(element) + + def intersection(self, *others: set[str]) -> set[str]: + # User expects a set back. + return self._set.intersection(*others) + + def intersection_update(self, *others: set[str]) -> None: + # In-place intersection should update both list and set. + self._set.intersection_update(*others) + # Elements were only removed. + self._list[:] = [element for element in self._list if element in self._set] + + def isdisjoint(self, other: set[str], /) -> bool: + return self._set.isdisjoint(other) + + def issubset(self, other: set[str], /) -> bool: + return self._set.issubset(other) + + def issuperset(self, other: set[str], /) -> bool: + return self._set.issuperset(other) + + def pop(self, index: int | None = None, /) -> str: + # In-place pop should update both list and set. + if index is None: + index = -1 + else: + warnings.warn( + "Using block level elements as a list is deprecated, use it as a set instead.", + DeprecationWarning, + ) + element = self._list.pop(index) + # Only remove from set if absent from list. + if element not in self._list: + self._set.remove(element) + return element + + def remove(self, element: str, /) -> None: + # In-place removal should update both list and set. + # We give precedence to set behavior, so we remove all occurrences from the list. + while True: + try: + self._list.remove(element) + except ValueError: + break + self._set.remove(element) + + @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") + def reverse(self) -> None: + self._list.reverse() + + @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") + def sort(self, /, *, key: Callable | None = None, reverse: bool = False) -> None: + self._list.sort(key=key, reverse=reverse) + + def symmetric_difference(self, other: set[str], /) -> set[str]: + # User expects a set back. + return self._set.symmetric_difference(other) + + def symmetric_difference_update(self, other: set[str], /) -> None: + # In-place symmetric difference should update both list and set. + self._set.symmetric_difference_update(other) + # Elements were both removed and added. + self._list[:] = [element for element in self._list if element in self._set] + self._list.extend(element for element in sorted(self._set - set(self._list))) + + def union(self, *others: set[str]) -> set[str]: + # User expects a set back. + return self._set.union(*others) + + def update(self, *others: set[str]) -> None: + # In-place union should update both list and set. + self._set.update(*others) + # Elements were only added. + self._list.extend(element for element in sorted(self._set - set(self._list))) + + +# Constants you might want to modify +# ----------------------------------------------------------------------------- -BLOCK_LEVEL_ELEMENTS: list[str] = [ +# Type it as `set[str]` to express our intent for it to be used as such. +# We explicitly lie here, so that users running type checkers will get +# warnings when they use the container as a list. This is a very effective +# way of communicating the change, and deprecating list-like usage. +BLOCK_LEVEL_ELEMENTS: set[str] = _BlockLevelElements([ # Elements which are invalid to wrap in a `

` tag. # See https://w3c.github.io/html/grouping-content.html#the-p-element 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl', @@ -56,9 +326,9 @@ 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script', 'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video', 'center' -] +]) # type: ignore[assignment] """ -List of HTML tags which get treated as block-level elements. Same as the `block_level_elements` +Set of HTML tags which get treated as block-level elements. Same as the `block_level_elements` attribute of the [`Markdown`][markdown.Markdown] class. Generally one should use the attribute on the class. This remains for compatibility with older extensions. """ @@ -111,31 +381,6 @@ def get_installed_extensions(): return metadata.entry_points(group='markdown.extensions') -def deprecated(message: str, stacklevel: int = 2): - """ - Raise a [`DeprecationWarning`][] when wrapped function/method is called. - - Usage: - - ```python - @deprecated("This method will be removed in version X; use Y instead.") - def some_method(): - pass - ``` - """ - def wrapper(func): - @wraps(func) - def deprecated_func(*args, **kwargs): - warnings.warn( - f"'{func.__name__}' is deprecated. {message}", - category=DeprecationWarning, - stacklevel=stacklevel - ) - return func(*args, **kwargs) - return deprecated_func - return wrapper - - def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None: """Parses a string representing a boolean value. If parsing was successful, returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`, diff --git a/tests/test_apis.py b/tests/test_apis.py index 55e2cdb66..efd6a23cb 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -920,7 +920,7 @@ class TestBlockAppend(unittest.TestCase): def testBlockAppend(self): """ Test that appended escapes are only in the current instance. """ md = markdown.Markdown() - md.block_level_elements.append('test') + md.block_level_elements.add('test') self.assertEqual('test' in md.block_level_elements, True) md2 = markdown.Markdown() self.assertEqual('test' not in md2.block_level_elements, True) diff --git a/tests/test_block_level_elements.py b/tests/test_block_level_elements.py new file mode 100644 index 000000000..d8609c446 --- /dev/null +++ b/tests/test_block_level_elements.py @@ -0,0 +1,482 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). + +Tests for block level elements. +=============================== + +Tests specific to the hybrid list/set container for block level elements. + +The hybrid list/set container exists for backwards compatibility reasons, +to support using both the `BLOCK_LEVEL_ELEMENTS` global variable (soft-deprecated) +and the `Markdown.block_level_elements` instance attribute (preferred) as a list or a set. +When we stop supporting list methods on these objects, we can remove the container +as well as this test module. +""" + +import unittest + +from markdown.util import _BlockLevelElements + + +class TestBlockLevelElements(unittest.TestCase): + """ Tests for the block level elements container. """ + + def test__init__(self): + ble = _BlockLevelElements([]) + self.assertEqual(ble._list, []) + self.assertEqual(ble._set, set()) + + def test__init__duplicates(self): + ble = _BlockLevelElements(["a", "a", "b"]) + self.assertEqual(ble._list, ["a", "a", "b"]) + self.assertEqual(ble._set, {"a", "b"}) + + def test___add__(self): + ble = _BlockLevelElements(["a", "b"]) + with self.assertWarns(DeprecationWarning): + ble2 = ble + ["c", "d"] + self.assertIsInstance(ble2, list) + self.assertEqual(ble2, ["a", "b", "c", "d"]) + + def test___add__duplicates(self): + ble = _BlockLevelElements(["a", "b"]) + with self.assertWarns(DeprecationWarning): + ble2 = ble + ["a", "b"] + self.assertIsInstance(ble2, list) + self.assertEqual(ble2, ["a", "b", "a", "b"]) + + def test___and__(self): + ble = _BlockLevelElements(["a", "b"]) + ble2 = ble & {"b", "c"} + self.assertIsInstance(ble2, set) + self.assertEqual(ble2, {"b"}) + + def test___contains__(self): + ble = _BlockLevelElements(["a", "b"]) + self.assertIn("a", ble) + self.assertNotIn("c", ble) + + def test___delitem__(self): + ble = _BlockLevelElements(["a", "b", "c"]) + with self.assertWarns(DeprecationWarning): + del ble[0] + self.assertEqual(ble._list, ["b", "c"]) + self.assertEqual(ble._set, {"b", "c"}) + with self.assertWarns(DeprecationWarning): + del ble[1] + self.assertEqual(ble._list, ["b"]) + self.assertEqual(ble._set, {"b"}) + with self.assertWarns(DeprecationWarning): + self.assertRaises(IndexError, ble.__delitem__, 10) + + def test___delitem__duplicates(self): + ble = _BlockLevelElements(["a", "a", "b"]) + with self.assertWarns(DeprecationWarning): + del ble[0] + self.assertEqual(ble._list, ["a", "b"]) + self.assertEqual(ble._set, {"a", "b"}) + + def test___getitem__(self): + ble = _BlockLevelElements(["a", "b", "c"]) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ble[0], "a") + self.assertEqual(ble[1], "b") + self.assertEqual(ble[2], "c") + self.assertRaises(IndexError, ble.__getitem__, 10) + + def test___getitem__duplicates(self): + ble = _BlockLevelElements(["a", "a", "b"]) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ble[0], "a") + self.assertEqual(ble[1], "a") + self.assertEqual(ble[2], "b") + + def test___iadd__(self): + ble = _BlockLevelElements(["a", "b"]) + with self.assertWarns(DeprecationWarning): + ble += ["c", "d"] + self.assertEqual(ble._list, ["a", "b", "c", "d"]) + self.assertEqual(ble._set, {"a", "b", "c", "d"}) + + def test___iadd__duplicates(self): + ble = _BlockLevelElements(["a", "b"]) + with self.assertWarns(DeprecationWarning): + ble += ["a", "b"] + self.assertEqual(ble._list, ["a", "b", "a", "b"]) + self.assertEqual(ble._set, {"a", "b"}) + + def test___iand__(self): + ble = _BlockLevelElements(["a", "b"]) + ble &= {"b", "c"} + self.assertEqual(ble._list, ["b"]) + self.assertEqual(ble._set, {"b"}) + + def test___ior__(self): + ble = _BlockLevelElements(["a", "b"]) + ble |= {"b", "c"} + self.assertEqual(ble._list, ["a", "b", "c"]) + self.assertEqual(ble._set, {"a", "b", "c"}) + + def test___iter__(self): + ble = _BlockLevelElements(["a", "b", "c"]) + self.assertEqual(tuple(ble), ("a", "b", "c")) + + def test___iter__duplicates(self): + ble = _BlockLevelElements(["a", "a", "b"]) + self.assertEqual(tuple(ble), ("a", "a", "b")) + + def test___len__(self): + self.assertEqual(len(_BlockLevelElements([])), 0) + self.assertEqual(len(_BlockLevelElements(["a", "b"])), 2) + self.assertEqual(len(_BlockLevelElements(["a", "b", "c"])), 3) + + def test___len__duplicates(self): + self.assertEqual(len(_BlockLevelElements(["a", "a"])), 2) + self.assertEqual(len(_BlockLevelElements(["a", "a", "b"])), 3) + + def test___or__(self): + ble = _BlockLevelElements(["a", "b"]) + ble2 = ble | {"b", "c"} + self.assertIsInstance(ble2, set) + self.assertEqual(ble2, {"a", "b", "c"}) + + def test___rand__(self): + ble = _BlockLevelElements(["a", "b"]) + ble2 = {"b", "c"} & ble + self.assertIsInstance(ble2, set) + self.assertEqual(ble2, {"b"}) + + def test___ror__(self): + ble = _BlockLevelElements(["a", "b"]) + ble2 = {"b", "c"} | ble + self.assertIsInstance(ble2, set) + self.assertEqual(ble2, {"a", "b", "c"}) + + def test___rsub__(self): + ble = _BlockLevelElements(["a", "b"]) + ble2 = {"b", "c"} - ble + self.assertIsInstance(ble2, set) + self.assertEqual(ble2, {"c"}) + + def test___rxor__(self): + ble = _BlockLevelElements(["a", "b"]) + ble2 = {"b", "c"} ^ ble + self.assertIsInstance(ble2, set) + self.assertEqual(ble2, {"a", "c"}) + + def test___sub__(self): + ble = _BlockLevelElements(["a", "b"]) + ble2 = ble - {"b", "c"} + self.assertIsInstance(ble2, set) + self.assertEqual(ble2, {"a"}) + + def test___xor__(self): + ble = _BlockLevelElements(["a", "b"]) + ble2 = ble ^ {"b", "c"} + self.assertIsInstance(ble2, set) + self.assertEqual(ble2, {"a", "c"}) + + def test___reversed__(self): + ble = _BlockLevelElements(["a", "b", "c"]) + with self.assertWarns(DeprecationWarning): + self.assertEqual(tuple(reversed(ble)), ("c", "b", "a")) + + def test___reversed__duplicates(self): + ble = _BlockLevelElements(["a", "a", "b"]) + with self.assertWarns(DeprecationWarning): + self.assertEqual(tuple(reversed(ble)), ("b", "a", "a")) + + def test___setitem__(self): + ble = _BlockLevelElements(["a", "b", "c"]) + with self.assertWarns(DeprecationWarning): + ble[0] = "d" + self.assertEqual(ble._list, ["d", "b", "c"]) + self.assertEqual(ble._set, {"d", "b", "c"}) + with self.assertWarns(DeprecationWarning): + ble[1] = "e" + self.assertEqual(ble._list, ["d", "e", "c"]) + self.assertEqual(ble._set, {"d", "e", "c"}) + with self.assertWarns(DeprecationWarning): + self.assertRaises(IndexError, ble.__setitem__, 10, "f") + + def test___setitem__duplicates(self): + ble = _BlockLevelElements(["a", "a", "b"]) + with self.assertWarns(DeprecationWarning): + ble[0] = "b" + self.assertEqual(ble._list, ["b", "a", "b"]) + self.assertEqual(ble._set, {"a", "b"}) + with self.assertWarns(DeprecationWarning): + ble[1] = "b" + self.assertEqual(ble._list, ["b", "b", "b"]) + self.assertEqual(ble._set, {"b"}) + + def test___str__(self): + ble = _BlockLevelElements(["a"]) + self.assertEqual(str(ble), "{'a'}") + + def test_add(self): + ble = _BlockLevelElements(["a", "b"]) + ble.add("c") + self.assertEqual(ble._list, ["a", "b", "c"]) + self.assertEqual(ble._set, {"a", "b", "c"}) + + def test_add_duplicates(self): + ble = _BlockLevelElements(["a", "b"]) + ble.add("a") + self.assertEqual(ble._list, ["a", "b", "a"]) + self.assertEqual(ble._set, {"a", "b"}) + + def test_append(self): + ble = _BlockLevelElements(["a", "b"]) + with self.assertWarns(DeprecationWarning): + ble.append("c") + self.assertEqual(ble._list, ["a", "b", "c"]) + self.assertEqual(ble._set, {"a", "b", "c"}) + + def test_append_duplicates(self): + ble = _BlockLevelElements(["a", "b"]) + with self.assertWarns(DeprecationWarning): + ble.append("a") + self.assertEqual(ble._list, ["a", "b", "a"]) + self.assertEqual(ble._set, {"a", "b"}) + + def test_clear(self): + ble = _BlockLevelElements(["a", "b"]) + ble.clear() + self.assertEqual(ble._list, []) + self.assertEqual(ble._set, set()) + + def test_copy(self): + ble = _BlockLevelElements(["a", "b"]) + ble2 = ble.copy() + self.assertIsNot(ble2, ble) + self.assertEqual(ble2._list, ["a", "b"]) + self.assertEqual(ble2._set, {"a", "b"}) + + def test_copy_duplicates(self): + ble = _BlockLevelElements(["a", "a"]) + ble2 = ble.copy() + self.assertIsNot(ble2, ble) + self.assertEqual(ble2._list, ["a", "a"]) + self.assertEqual(ble2._set, {"a"}) + + def test_count(self): + ble = _BlockLevelElements(["a", "b"]) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ble.count("a"), 1) + self.assertEqual(ble.count("b"), 1) + self.assertEqual(ble.count("c"), 0) + + def test_count_duplicates(self): + ble = _BlockLevelElements(["a", "a"]) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ble.count("a"), 2) + + def test_difference(self): + ble = _BlockLevelElements(["a", "b"]) + ble2 = ble.difference({"b", "c"}) + self.assertIsInstance(ble2, set) + self.assertEqual(ble2, {"a"}) + + def test_difference_update(self): + ble = _BlockLevelElements(["a", "b"]) + ble.difference_update({"b", "c"}) + self.assertEqual(ble._list, ["a"]) + self.assertEqual(ble._set, {"a"}) + + def test_discard(self): + ble = _BlockLevelElements(["a", "b"]) + ble.discard("b") + ble.discard("b") # Assert no error. + self.assertEqual(ble._list, ["a"]) + self.assertEqual(ble._set, {"a"}) + + def test_extend(self): + ble = _BlockLevelElements(["a", "b"]) + with self.assertWarns(DeprecationWarning): + ble.extend(["c", "d"]) + self.assertEqual(ble._list, ["a", "b", "c", "d"]) + self.assertEqual(ble._set, {"a", "b", "c", "d"}) + + def test_extend_duplicates(self): + ble = _BlockLevelElements(["a", "b"]) + with self.assertWarns(DeprecationWarning): + ble.extend(["a", "b"]) + self.assertEqual(ble._list, ["a", "b", "a", "b"]) + self.assertEqual(ble._set, {"a", "b"}) + + def test_index(self): + ble = _BlockLevelElements(["a", "b", "c"]) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ble.index("a"), 0) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ble.index("b"), 1) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ble.index("c"), 2) + with self.assertWarns(DeprecationWarning): + self.assertRaises(ValueError, ble.index, "d") + + def test_index_duplicates(self): + ble = _BlockLevelElements(["a", "b", "b"]) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ble.index("b"), 1) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ble.index("b", 2), 2) + + def test_insert(self): + ble = _BlockLevelElements(["a", "b", "c"]) + with self.assertWarns(DeprecationWarning): + ble.insert(1, "d") + self.assertEqual(ble._list, ["a", "d", "b", "c"]) + self.assertEqual(ble._set, {"a", "b", "c", "d"}) + with self.assertWarns(DeprecationWarning): + ble.insert(100, "e") + self.assertIn("e", ble) + + def test_insert_duplicates(self): + ble = _BlockLevelElements(["a", "a", "b"]) + with self.assertWarns(DeprecationWarning): + ble.insert(1, "b") + self.assertEqual(ble._list, ["a", "b", "a", "b"]) + self.assertEqual(ble._set, {"a", "b"}) + + def test_intersection(self): + ble = _BlockLevelElements(["a", "b"]) + ble2 = ble.intersection({"b", "c"}) + self.assertIsInstance(ble2, set) + self.assertEqual(ble2, {"b"}) + + def test_intersection_update(self): + ble = _BlockLevelElements(["a", "b"]) + ble.intersection_update({"b", "c"}) + self.assertEqual(ble._list, ["b"]) + self.assertEqual(ble._set, {"b"}) + + def test_isdisjoint(self): + ble = _BlockLevelElements(["a", "b"]) + self.assertFalse(ble.isdisjoint({"b", "c"})) + self.assertTrue(ble.isdisjoint({"c", "d"})) + + def test_issubset(self): + ble = _BlockLevelElements(["a", "b"]) + self.assertTrue(ble.issubset({"a", "b", "c"})) + self.assertFalse(ble.issubset({"a", "c"})) + + def test_issuperset(self): + ble = _BlockLevelElements(["a", "b"]) + self.assertTrue(ble.issuperset({"a"})) + self.assertTrue(ble.issuperset({"a", "b"})) + self.assertFalse(ble.issuperset({"a", "c"})) + + def test_pop(self): + ble = _BlockLevelElements(["a", "b", "c"]) + self.assertEqual(ble.pop(), "c") + self.assertEqual(ble._list, ["a", "b"]) + self.assertEqual(ble._set, {"a", "b"}) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ble.pop(0), "a") + self.assertEqual(ble._list, ["b"]) + self.assertEqual(ble._set, {"b"}) + with self.assertWarns(DeprecationWarning): + self.assertRaises(IndexError, ble.pop, 10) + + def test_pop_duplicates(self): + ble = _BlockLevelElements(["a", "a", "b", "b"]) + self.assertEqual(ble.pop(), "b") + self.assertEqual(ble._list, ["a", "a", "b"]) + self.assertEqual(ble._set, {"a", "b"}) + with self.assertWarns(DeprecationWarning): + self.assertEqual(ble.pop(0), "a") + self.assertEqual(ble._list, ["a", "b"]) + self.assertEqual(ble._set, {"a", "b"}) + self.assertEqual(ble.pop(), "b") + self.assertEqual(ble._list, ["a"]) + self.assertEqual(ble._set, {"a"}) + + def test_remove(self): + ble = _BlockLevelElements(["a", "b", "c"]) + ble.remove("b") + self.assertEqual(ble._list, ["a", "c"]) + self.assertEqual(ble._set, {"a", "c"}) + self.assertRaises(KeyError, ble.remove, "d") + + def test_remove_duplicates(self): + ble = _BlockLevelElements(["a", "a", "b"]) + ble.remove("a") + self.assertEqual(ble._list, ["b"]) + self.assertEqual(ble._set, {"b"}) + + def test_reverse(self): + ble = _BlockLevelElements(["a", "b", "c"]) + with self.assertWarns(DeprecationWarning): + ble.reverse() + self.assertEqual(ble._list, ["c", "b", "a"]) + self.assertEqual(ble._set, {"a", "b", "c"}) + + def test_reverse_duplicates(self): + ble = _BlockLevelElements(["a", "a", "b"]) + with self.assertWarns(DeprecationWarning): + ble.reverse() + self.assertEqual(ble._list, ["b", "a", "a"]) + self.assertEqual(ble._set, {"a", "b"}) + + def test_sort(self): + ble = _BlockLevelElements(["c", "a", "b"]) + with self.assertWarns(DeprecationWarning): + ble.sort() + self.assertEqual(ble._list, ["a", "b", "c"]) + self.assertEqual(ble._set, {"a", "b", "c"}) + + def test_sort_duplicates(self): + ble = _BlockLevelElements(["b", "a", "b"]) + with self.assertWarns(DeprecationWarning): + ble.sort() + self.assertEqual(ble._list, ["a", "b", "b"]) + self.assertEqual(ble._set, {"a", "b"}) + + def test_symmetric_difference(self): + ble = _BlockLevelElements(["a", "b"]) + ble2 = ble.symmetric_difference({"b", "c"}) + self.assertIsInstance(ble2, set) + self.assertEqual(ble2, {"a", "c"}) + + def test_symmetric_difference_update(self): + ble = _BlockLevelElements(["a", "b"]) + ble.symmetric_difference_update({"b", "c"}) + self.assertEqual(ble._list, ["a", "c"]) + self.assertEqual(ble._set, {"a", "c"}) + + def test_union(self): + ble = _BlockLevelElements(["a", "b"]) + ble2 = ble.union({"b", "c"}) + self.assertIsInstance(ble2, set) + self.assertEqual(ble2, {"a", "b", "c"}) + + def test_update(self): + ble = _BlockLevelElements(["a", "b"]) + ble.update({"b", "c"}) + self.assertEqual(ble._list, ["a", "b", "c"]) + self.assertEqual(ble._set, {"a", "b", "c"}) + + # Special tests + def test_isinstance(self): + ble = _BlockLevelElements([]) + self.assertIsInstance(ble, _BlockLevelElements) + self.assertIsInstance(ble, list) From 84e3b2ac357bf45c21a1e1cd9a1b3f090156fed3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Fri, 21 Feb 2025 16:33:41 +0100 Subject: [PATCH 2/4] Don't precompute placeholder replacements in raw HTML post-processor Previously, the raw HTML post-processor would precompute all possible replacements for placeholders in a string, based on the HTML stash. It would then apply a regular expression substitution using these replacements. Finally, if the text changed, it would recurse, and do all that again. This was inefficient because placeholders were re-computed each time it recursed, and because only a few replacements would be used anyway. This change moves the recursion into the regular expression substitution, so that: 1. the regular expression does minimal work on the text (contrary to re-scanning text already scanned in previous frames); 2. but more importantly, replacements aren't computed ahead of time anymore (and even less *several times*), and only fetched from the HTML stash as placeholders are found in the text. The substitution function relies on the regular expression groups ordering: we make sure to match `

PLACEHOLDER

` first, before `PLACEHOLDER`. The presence of a wrapping `p` tag indicates whether to wrap again the substitution result, or not (also depending on whether the substituted HTML is a block-level tag). Issue-1507: https://github.com/Python-Markdown/markdown/issues/1507 --- markdown/postprocessors.py | 40 +++++++++++++------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 7f5ede90c..d4b0e1fdc 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -28,7 +28,6 @@ from __future__ import annotations -from collections import OrderedDict from typing import TYPE_CHECKING, Any from . import util import re @@ -73,37 +72,26 @@ class RawHtmlPostprocessor(Postprocessor): def run(self, text: str) -> str: """ Iterate over html stash and restore html. """ - replacements = OrderedDict() - for i in range(self.md.htmlStash.html_counter): - html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i]) - if self.isblocklevel(html): - replacements["

{}

".format( - self.md.htmlStash.get_placeholder(i))] = html - replacements[self.md.htmlStash.get_placeholder(i)] = html - def substitute_match(m: re.Match[str]) -> str: - key = m.group(0) - - if key not in replacements: - if key[3:-4] in replacements: - return f'

{ replacements[key[3:-4]] }

' - else: - return key - - return replacements[key] - - if replacements: + if key := m.group(1): + wrapped = True + else: + key = m.group(2) + wrapped = False + if (key := int(key)) >= self.md.htmlStash.html_counter: + return m.group(0) + html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[key]) + if not wrapped or self.isblocklevel(html): + return pattern.sub(substitute_match, html) + return pattern.sub(substitute_match, f"

{html}

") + + if self.md.htmlStash.html_counter: base_placeholder = util.HTML_PLACEHOLDER % r'([0-9]+)' pattern = re.compile(f'

{ base_placeholder }

|{ base_placeholder }') - processed_text = pattern.sub(substitute_match, text) + return pattern.sub(substitute_match, text) else: return text - if processed_text == text: - return processed_text - else: - return self.run(processed_text) - def isblocklevel(self, html: str) -> bool: """ Check is block of HTML is block-level. """ m = self.BLOCK_LEVEL_REGEX.match(html) From b1fad82f5aa29fd0300505eb202cd98952af7fda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Fri, 21 Feb 2025 17:00:19 +0100 Subject: [PATCH 3/4] Add changelog entry for improved raw HTML post-processor perfs --- docs/changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changelog.md b/docs/changelog.md index 34817bddf..17829a618 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * DRY fix in `abbr` extension by introducing method `create_element` (#1483). * Clean up test directory some removing some redundant tests and port non-redundant cases to the newer test framework. +* Improved performance of the raw HTML post-processor (#1510). ### Fixed From e6b086b4099dbdc03fef944722b04fb338882d34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Wed, 26 Mar 2025 13:50:10 +0100 Subject: [PATCH 4/4] Revert "Use set instead of list for block level elements" This reverts commit eae8169b18f2baafe1ccbdbf147f46af785a56cb. --- markdown/core.py | 9 +- markdown/extensions/md_in_html.py | 3 +- markdown/util.py | 311 ++----------------- tests/test_apis.py | 2 +- tests/test_block_level_elements.py | 482 ----------------------------- 5 files changed, 37 insertions(+), 770 deletions(-) delete mode 100644 tests/test_block_level_elements.py diff --git a/markdown/core.py b/markdown/core.py index 675b01e63..6c7a21be9 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -50,7 +50,7 @@ class Markdown: Attributes: Markdown.tab_length (int): The number of spaces which correspond to a single tab. Default: `4`. Markdown.ESCAPED_CHARS (list[str]): List of characters which get the backslash escape treatment. - Markdown.block_level_elements (set[str]): Set of HTML tags which get treated as block-level elements. + Markdown.block_level_elements (list[str]): List of HTML tags which get treated as block-level elements. See [`markdown.util.BLOCK_LEVEL_ELEMENTS`][] for the full list of elements. Markdown.registeredExtensions (list[Extension]): List of extensions which have called [`registerExtension`][markdown.Markdown.registerExtension] during setup. @@ -113,12 +113,7 @@ def __init__(self, **kwargs): ] """ List of characters which get the backslash escape treatment. """ - # `BLOCK_LEVEL_ELEMENTS` is actually a hybrid list/set container. - # It supports list methods for backwards compatibility. - # We explicitly lie here, so that users running type checkers will get - # warnings when they use the container as a list. This is a very effective - # way of communicating the change, and deprecating list-like usage. - self.block_level_elements: set[str] = BLOCK_LEVEL_ELEMENTS.copy() + self.block_level_elements: list[str] = BLOCK_LEVEL_ELEMENTS.copy() self.registeredExtensions: list[Extension] = [] self.docType = "" # TODO: Maybe delete this. It does not appear to be used anymore. diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 918d98564..d1fbd7af5 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -42,8 +42,7 @@ class HTMLExtractorExtra(HTMLExtractor): def __init__(self, md: Markdown, *args, **kwargs): # All block-level tags. - # TODO: Use `md.block_level_elements.copy()` when it becomes a regular set. - self.block_level_tags = set(md.block_level_elements) + self.block_level_tags = set(md.block_level_elements.copy()) # Block-level tags in which the content only gets span level parsing self.span_tags = set( ['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'summary', 'td', 'th'] diff --git a/markdown/util.py b/markdown/util.py index c05b3a727..f547721eb 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -29,7 +29,7 @@ import warnings from functools import wraps, lru_cache from itertools import count -from typing import TYPE_CHECKING, Callable, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload +from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload if TYPE_CHECKING: # pragma: no cover from markdown import Markdown @@ -38,283 +38,13 @@ _T = TypeVar('_T') -def deprecated(message: str, stacklevel: int = 2): - """ - Raise a [`DeprecationWarning`][] when wrapped function/method is called. - - Usage: - - ```python - @deprecated("This method will be removed in version X; use Y instead.") - def some_method(): - pass - ``` - """ - def wrapper(func): - @wraps(func) - def deprecated_func(*args, **kwargs): - warnings.warn( - f"'{func.__name__}' is deprecated. {message}", - category=DeprecationWarning, - stacklevel=stacklevel - ) - return func(*args, **kwargs) - return deprecated_func - return wrapper - - -# TODO: Raise errors from list methods in the future. -# Later, remove this class entirely and use a regular set. -class _BlockLevelElements(list): - # This hybrid list/set container exists for backwards compatibility reasons, - # to support using both the `BLOCK_LEVEL_ELEMENTS` global variable (soft-deprecated) - # and the `Markdown.block_level_elements` instance attribute (preferred) as a list or a set. - # When we stop supporting list methods on these objects, we can remove the container - # as well as the `test_block_level_elements` test module. - - def __init__(self, elements: list[str], /) -> None: - self._list = elements.copy() - self._set = set(self._list) - - @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") - def __add__(self, other: list[str], /) -> list[str]: - # Using `+` means user expects a list back. - return self._list + other - - def __and__(self, other: set[str], /) -> set[str]: - # Using `&` means user expects a set back. - return self._set & other - - def __contains__(self, item: str, /) -> bool: - return item in self._set - - @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") - def __delitem__(self, index: int, /) -> None: - element = self._list[index] - del self._list[index] - # Only remove from set if absent from list. - if element not in self._list: - self._set.remove(element) - - @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") - def __getitem__(self, index: int, /) -> str: - return self._list[index] - - @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") - def __iadd__(self, other: list[str], /) -> set[str]: - # In-place addition should update both list and set. - self._list += other - self._set.update(set(other)) - return self # type: ignore[return-value] - - def __iand__(self, other: set[str], /) -> set[str]: - # In-place intersection should update both list and set. - self._set &= other - # Elements were only removed. - self._list[:] = [element for element in self._list if element in self._set] - return self # type: ignore[return-value] - - def __ior__(self, other: set[str], /) -> set[str]: - # In-place union should update both list and set. - self._set |= other - # Elements were only added. - self._list.extend(element for element in sorted(self._set - set(self._list))) - return self # type: ignore[return-value] - - def __iter__(self) -> Iterator[str]: - return iter(self._list) +""" +Constants you might want to modify +----------------------------------------------------------------------------- +""" - def __len__(self) -> int: - # Length of the list, for backwards compatibility. - # If used as a set, both lengths will be the same. - return len(self._list) - - def __or__(self, value: set[str], /) -> set[str]: - # Using `|` means user expects a set back. - return self._set | value - - def __rand__(self, value: set[str], /) -> set[str]: - # Using `&` means user expects a set back. - return value & self._set - - def __ror__(self, value: set[str], /) -> set[str]: - # Using `|` means user expects a set back. - return value | self._set - - def __rsub__(self, value: set[str], /) -> set[str]: - # Using `-` means user expects a set back. - return value - self._set - - def __rxor__(self, value: set[str], /) -> set[str]: - # Using `^` means user expects a set back. - return value ^ self._set - - def __sub__(self, value: set[str], /) -> set[str]: - # Using `-` means user expects a set back. - return self._set - value - - def __xor__(self, value: set[str], /) -> set[str]: - # Using `^` means user expects a set back. - return self._set ^ value - - @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") - def __reversed__(self) -> Iterator[str]: - return reversed(self._list) - - @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") - def __setitem__(self, index: int, value: str, /) -> None: - # In-place item-setting should update both list and set. - old = self._list[index] - self._list[index] = value - # Only remove from set if absent from list. - if old not in self._list: - self._set.discard(old) - self._set.add(value) - - def __str__(self) -> str: - return str(self._set) - - def add(self, element: str, /) -> None: - # In-place addition should update both list and set. - self._set.add(element) - self._list.append(element) - - @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") - def append(self, element: str, /) -> None: - # In-place addition should update both list and set. - self._list.append(element) - self._set.add(element) - - def clear(self) -> None: - self._list.clear() - self._set.clear() - - def copy(self) -> _BlockLevelElements: - # We're not sure yet whether the user wants to use it as a set or list. - return _BlockLevelElements(self._list) - - @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") - def count(self, value: str, /) -> int: - # Count in list, for backwards compatibility. - # If used as a set, both counts will be the same (1). - return self._list.count(value) - - def difference(self, *others: set[str]) -> set[str]: - # User expects a set back. - return self._set.difference(*others) - - def difference_update(self, *others: set[str]) -> None: - # In-place difference should update both list and set. - self._set.difference_update(*others) - # Elements were only removed. - self._list[:] = [element for element in self._list if element in self._set] - - def discard(self, element: str, /) -> None: - # In-place discard should update both list and set. - self._set.discard(element) - try: - self._list.remove(element) - except ValueError: - pass - - @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") - def extend(self, elements: list[str], /) -> None: - # In-place extension should update both list and set. - self._list.extend(elements) - self._set.update(elements) - - @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") - def index(self, value, start: int = 0, stop: int = sys.maxsize, /): - return self._list.index(value, start, stop) - - @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") - def insert(self, index: int, element: str, /) -> None: - # In-place insertion should update both list and set. - self._list.insert(index, element) - self._set.add(element) - - def intersection(self, *others: set[str]) -> set[str]: - # User expects a set back. - return self._set.intersection(*others) - - def intersection_update(self, *others: set[str]) -> None: - # In-place intersection should update both list and set. - self._set.intersection_update(*others) - # Elements were only removed. - self._list[:] = [element for element in self._list if element in self._set] - - def isdisjoint(self, other: set[str], /) -> bool: - return self._set.isdisjoint(other) - - def issubset(self, other: set[str], /) -> bool: - return self._set.issubset(other) - - def issuperset(self, other: set[str], /) -> bool: - return self._set.issuperset(other) - - def pop(self, index: int | None = None, /) -> str: - # In-place pop should update both list and set. - if index is None: - index = -1 - else: - warnings.warn( - "Using block level elements as a list is deprecated, use it as a set instead.", - DeprecationWarning, - ) - element = self._list.pop(index) - # Only remove from set if absent from list. - if element not in self._list: - self._set.remove(element) - return element - - def remove(self, element: str, /) -> None: - # In-place removal should update both list and set. - # We give precedence to set behavior, so we remove all occurrences from the list. - while True: - try: - self._list.remove(element) - except ValueError: - break - self._set.remove(element) - - @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") - def reverse(self) -> None: - self._list.reverse() - - @deprecated("Using block level elements as a list is deprecated, use it as a set instead.") - def sort(self, /, *, key: Callable | None = None, reverse: bool = False) -> None: - self._list.sort(key=key, reverse=reverse) - - def symmetric_difference(self, other: set[str], /) -> set[str]: - # User expects a set back. - return self._set.symmetric_difference(other) - - def symmetric_difference_update(self, other: set[str], /) -> None: - # In-place symmetric difference should update both list and set. - self._set.symmetric_difference_update(other) - # Elements were both removed and added. - self._list[:] = [element for element in self._list if element in self._set] - self._list.extend(element for element in sorted(self._set - set(self._list))) - - def union(self, *others: set[str]) -> set[str]: - # User expects a set back. - return self._set.union(*others) - - def update(self, *others: set[str]) -> None: - # In-place union should update both list and set. - self._set.update(*others) - # Elements were only added. - self._list.extend(element for element in sorted(self._set - set(self._list))) - - -# Constants you might want to modify -# ----------------------------------------------------------------------------- -# Type it as `set[str]` to express our intent for it to be used as such. -# We explicitly lie here, so that users running type checkers will get -# warnings when they use the container as a list. This is a very effective -# way of communicating the change, and deprecating list-like usage. -BLOCK_LEVEL_ELEMENTS: set[str] = _BlockLevelElements([ +BLOCK_LEVEL_ELEMENTS: list[str] = [ # Elements which are invalid to wrap in a `

` tag. # See https://w3c.github.io/html/grouping-content.html#the-p-element 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl', @@ -326,9 +56,9 @@ def update(self, *others: set[str]) -> None: 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script', 'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video', 'center' -]) # type: ignore[assignment] +] """ -Set of HTML tags which get treated as block-level elements. Same as the `block_level_elements` +List of HTML tags which get treated as block-level elements. Same as the `block_level_elements` attribute of the [`Markdown`][markdown.Markdown] class. Generally one should use the attribute on the class. This remains for compatibility with older extensions. """ @@ -381,6 +111,31 @@ def get_installed_extensions(): return metadata.entry_points(group='markdown.extensions') +def deprecated(message: str, stacklevel: int = 2): + """ + Raise a [`DeprecationWarning`][] when wrapped function/method is called. + + Usage: + + ```python + @deprecated("This method will be removed in version X; use Y instead.") + def some_method(): + pass + ``` + """ + def wrapper(func): + @wraps(func) + def deprecated_func(*args, **kwargs): + warnings.warn( + f"'{func.__name__}' is deprecated. {message}", + category=DeprecationWarning, + stacklevel=stacklevel + ) + return func(*args, **kwargs) + return deprecated_func + return wrapper + + def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None: """Parses a string representing a boolean value. If parsing was successful, returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`, diff --git a/tests/test_apis.py b/tests/test_apis.py index efd6a23cb..55e2cdb66 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -920,7 +920,7 @@ class TestBlockAppend(unittest.TestCase): def testBlockAppend(self): """ Test that appended escapes are only in the current instance. """ md = markdown.Markdown() - md.block_level_elements.add('test') + md.block_level_elements.append('test') self.assertEqual('test' in md.block_level_elements, True) md2 = markdown.Markdown() self.assertEqual('test' not in md2.block_level_elements, True) diff --git a/tests/test_block_level_elements.py b/tests/test_block_level_elements.py deleted file mode 100644 index d8609c446..000000000 --- a/tests/test_block_level_elements.py +++ /dev/null @@ -1,482 +0,0 @@ -""" -Python Markdown - -A Python implementation of John Gruber's Markdown. - -Documentation: https://python-markdown.github.io/ -GitHub: https://github.com/Python-Markdown/markdown/ -PyPI: https://pypi.org/project/Markdown/ - -Started by Manfred Stienstra (http://www.dwerg.net/). -Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). -Currently maintained by Waylan Limberg (https://github.com/waylan), -Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). - -Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later) -Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) -Copyright 2004 Manfred Stienstra (the original version) - -License: BSD (see LICENSE.md for details). - -Tests for block level elements. -=============================== - -Tests specific to the hybrid list/set container for block level elements. - -The hybrid list/set container exists for backwards compatibility reasons, -to support using both the `BLOCK_LEVEL_ELEMENTS` global variable (soft-deprecated) -and the `Markdown.block_level_elements` instance attribute (preferred) as a list or a set. -When we stop supporting list methods on these objects, we can remove the container -as well as this test module. -""" - -import unittest - -from markdown.util import _BlockLevelElements - - -class TestBlockLevelElements(unittest.TestCase): - """ Tests for the block level elements container. """ - - def test__init__(self): - ble = _BlockLevelElements([]) - self.assertEqual(ble._list, []) - self.assertEqual(ble._set, set()) - - def test__init__duplicates(self): - ble = _BlockLevelElements(["a", "a", "b"]) - self.assertEqual(ble._list, ["a", "a", "b"]) - self.assertEqual(ble._set, {"a", "b"}) - - def test___add__(self): - ble = _BlockLevelElements(["a", "b"]) - with self.assertWarns(DeprecationWarning): - ble2 = ble + ["c", "d"] - self.assertIsInstance(ble2, list) - self.assertEqual(ble2, ["a", "b", "c", "d"]) - - def test___add__duplicates(self): - ble = _BlockLevelElements(["a", "b"]) - with self.assertWarns(DeprecationWarning): - ble2 = ble + ["a", "b"] - self.assertIsInstance(ble2, list) - self.assertEqual(ble2, ["a", "b", "a", "b"]) - - def test___and__(self): - ble = _BlockLevelElements(["a", "b"]) - ble2 = ble & {"b", "c"} - self.assertIsInstance(ble2, set) - self.assertEqual(ble2, {"b"}) - - def test___contains__(self): - ble = _BlockLevelElements(["a", "b"]) - self.assertIn("a", ble) - self.assertNotIn("c", ble) - - def test___delitem__(self): - ble = _BlockLevelElements(["a", "b", "c"]) - with self.assertWarns(DeprecationWarning): - del ble[0] - self.assertEqual(ble._list, ["b", "c"]) - self.assertEqual(ble._set, {"b", "c"}) - with self.assertWarns(DeprecationWarning): - del ble[1] - self.assertEqual(ble._list, ["b"]) - self.assertEqual(ble._set, {"b"}) - with self.assertWarns(DeprecationWarning): - self.assertRaises(IndexError, ble.__delitem__, 10) - - def test___delitem__duplicates(self): - ble = _BlockLevelElements(["a", "a", "b"]) - with self.assertWarns(DeprecationWarning): - del ble[0] - self.assertEqual(ble._list, ["a", "b"]) - self.assertEqual(ble._set, {"a", "b"}) - - def test___getitem__(self): - ble = _BlockLevelElements(["a", "b", "c"]) - with self.assertWarns(DeprecationWarning): - self.assertEqual(ble[0], "a") - self.assertEqual(ble[1], "b") - self.assertEqual(ble[2], "c") - self.assertRaises(IndexError, ble.__getitem__, 10) - - def test___getitem__duplicates(self): - ble = _BlockLevelElements(["a", "a", "b"]) - with self.assertWarns(DeprecationWarning): - self.assertEqual(ble[0], "a") - self.assertEqual(ble[1], "a") - self.assertEqual(ble[2], "b") - - def test___iadd__(self): - ble = _BlockLevelElements(["a", "b"]) - with self.assertWarns(DeprecationWarning): - ble += ["c", "d"] - self.assertEqual(ble._list, ["a", "b", "c", "d"]) - self.assertEqual(ble._set, {"a", "b", "c", "d"}) - - def test___iadd__duplicates(self): - ble = _BlockLevelElements(["a", "b"]) - with self.assertWarns(DeprecationWarning): - ble += ["a", "b"] - self.assertEqual(ble._list, ["a", "b", "a", "b"]) - self.assertEqual(ble._set, {"a", "b"}) - - def test___iand__(self): - ble = _BlockLevelElements(["a", "b"]) - ble &= {"b", "c"} - self.assertEqual(ble._list, ["b"]) - self.assertEqual(ble._set, {"b"}) - - def test___ior__(self): - ble = _BlockLevelElements(["a", "b"]) - ble |= {"b", "c"} - self.assertEqual(ble._list, ["a", "b", "c"]) - self.assertEqual(ble._set, {"a", "b", "c"}) - - def test___iter__(self): - ble = _BlockLevelElements(["a", "b", "c"]) - self.assertEqual(tuple(ble), ("a", "b", "c")) - - def test___iter__duplicates(self): - ble = _BlockLevelElements(["a", "a", "b"]) - self.assertEqual(tuple(ble), ("a", "a", "b")) - - def test___len__(self): - self.assertEqual(len(_BlockLevelElements([])), 0) - self.assertEqual(len(_BlockLevelElements(["a", "b"])), 2) - self.assertEqual(len(_BlockLevelElements(["a", "b", "c"])), 3) - - def test___len__duplicates(self): - self.assertEqual(len(_BlockLevelElements(["a", "a"])), 2) - self.assertEqual(len(_BlockLevelElements(["a", "a", "b"])), 3) - - def test___or__(self): - ble = _BlockLevelElements(["a", "b"]) - ble2 = ble | {"b", "c"} - self.assertIsInstance(ble2, set) - self.assertEqual(ble2, {"a", "b", "c"}) - - def test___rand__(self): - ble = _BlockLevelElements(["a", "b"]) - ble2 = {"b", "c"} & ble - self.assertIsInstance(ble2, set) - self.assertEqual(ble2, {"b"}) - - def test___ror__(self): - ble = _BlockLevelElements(["a", "b"]) - ble2 = {"b", "c"} | ble - self.assertIsInstance(ble2, set) - self.assertEqual(ble2, {"a", "b", "c"}) - - def test___rsub__(self): - ble = _BlockLevelElements(["a", "b"]) - ble2 = {"b", "c"} - ble - self.assertIsInstance(ble2, set) - self.assertEqual(ble2, {"c"}) - - def test___rxor__(self): - ble = _BlockLevelElements(["a", "b"]) - ble2 = {"b", "c"} ^ ble - self.assertIsInstance(ble2, set) - self.assertEqual(ble2, {"a", "c"}) - - def test___sub__(self): - ble = _BlockLevelElements(["a", "b"]) - ble2 = ble - {"b", "c"} - self.assertIsInstance(ble2, set) - self.assertEqual(ble2, {"a"}) - - def test___xor__(self): - ble = _BlockLevelElements(["a", "b"]) - ble2 = ble ^ {"b", "c"} - self.assertIsInstance(ble2, set) - self.assertEqual(ble2, {"a", "c"}) - - def test___reversed__(self): - ble = _BlockLevelElements(["a", "b", "c"]) - with self.assertWarns(DeprecationWarning): - self.assertEqual(tuple(reversed(ble)), ("c", "b", "a")) - - def test___reversed__duplicates(self): - ble = _BlockLevelElements(["a", "a", "b"]) - with self.assertWarns(DeprecationWarning): - self.assertEqual(tuple(reversed(ble)), ("b", "a", "a")) - - def test___setitem__(self): - ble = _BlockLevelElements(["a", "b", "c"]) - with self.assertWarns(DeprecationWarning): - ble[0] = "d" - self.assertEqual(ble._list, ["d", "b", "c"]) - self.assertEqual(ble._set, {"d", "b", "c"}) - with self.assertWarns(DeprecationWarning): - ble[1] = "e" - self.assertEqual(ble._list, ["d", "e", "c"]) - self.assertEqual(ble._set, {"d", "e", "c"}) - with self.assertWarns(DeprecationWarning): - self.assertRaises(IndexError, ble.__setitem__, 10, "f") - - def test___setitem__duplicates(self): - ble = _BlockLevelElements(["a", "a", "b"]) - with self.assertWarns(DeprecationWarning): - ble[0] = "b" - self.assertEqual(ble._list, ["b", "a", "b"]) - self.assertEqual(ble._set, {"a", "b"}) - with self.assertWarns(DeprecationWarning): - ble[1] = "b" - self.assertEqual(ble._list, ["b", "b", "b"]) - self.assertEqual(ble._set, {"b"}) - - def test___str__(self): - ble = _BlockLevelElements(["a"]) - self.assertEqual(str(ble), "{'a'}") - - def test_add(self): - ble = _BlockLevelElements(["a", "b"]) - ble.add("c") - self.assertEqual(ble._list, ["a", "b", "c"]) - self.assertEqual(ble._set, {"a", "b", "c"}) - - def test_add_duplicates(self): - ble = _BlockLevelElements(["a", "b"]) - ble.add("a") - self.assertEqual(ble._list, ["a", "b", "a"]) - self.assertEqual(ble._set, {"a", "b"}) - - def test_append(self): - ble = _BlockLevelElements(["a", "b"]) - with self.assertWarns(DeprecationWarning): - ble.append("c") - self.assertEqual(ble._list, ["a", "b", "c"]) - self.assertEqual(ble._set, {"a", "b", "c"}) - - def test_append_duplicates(self): - ble = _BlockLevelElements(["a", "b"]) - with self.assertWarns(DeprecationWarning): - ble.append("a") - self.assertEqual(ble._list, ["a", "b", "a"]) - self.assertEqual(ble._set, {"a", "b"}) - - def test_clear(self): - ble = _BlockLevelElements(["a", "b"]) - ble.clear() - self.assertEqual(ble._list, []) - self.assertEqual(ble._set, set()) - - def test_copy(self): - ble = _BlockLevelElements(["a", "b"]) - ble2 = ble.copy() - self.assertIsNot(ble2, ble) - self.assertEqual(ble2._list, ["a", "b"]) - self.assertEqual(ble2._set, {"a", "b"}) - - def test_copy_duplicates(self): - ble = _BlockLevelElements(["a", "a"]) - ble2 = ble.copy() - self.assertIsNot(ble2, ble) - self.assertEqual(ble2._list, ["a", "a"]) - self.assertEqual(ble2._set, {"a"}) - - def test_count(self): - ble = _BlockLevelElements(["a", "b"]) - with self.assertWarns(DeprecationWarning): - self.assertEqual(ble.count("a"), 1) - self.assertEqual(ble.count("b"), 1) - self.assertEqual(ble.count("c"), 0) - - def test_count_duplicates(self): - ble = _BlockLevelElements(["a", "a"]) - with self.assertWarns(DeprecationWarning): - self.assertEqual(ble.count("a"), 2) - - def test_difference(self): - ble = _BlockLevelElements(["a", "b"]) - ble2 = ble.difference({"b", "c"}) - self.assertIsInstance(ble2, set) - self.assertEqual(ble2, {"a"}) - - def test_difference_update(self): - ble = _BlockLevelElements(["a", "b"]) - ble.difference_update({"b", "c"}) - self.assertEqual(ble._list, ["a"]) - self.assertEqual(ble._set, {"a"}) - - def test_discard(self): - ble = _BlockLevelElements(["a", "b"]) - ble.discard("b") - ble.discard("b") # Assert no error. - self.assertEqual(ble._list, ["a"]) - self.assertEqual(ble._set, {"a"}) - - def test_extend(self): - ble = _BlockLevelElements(["a", "b"]) - with self.assertWarns(DeprecationWarning): - ble.extend(["c", "d"]) - self.assertEqual(ble._list, ["a", "b", "c", "d"]) - self.assertEqual(ble._set, {"a", "b", "c", "d"}) - - def test_extend_duplicates(self): - ble = _BlockLevelElements(["a", "b"]) - with self.assertWarns(DeprecationWarning): - ble.extend(["a", "b"]) - self.assertEqual(ble._list, ["a", "b", "a", "b"]) - self.assertEqual(ble._set, {"a", "b"}) - - def test_index(self): - ble = _BlockLevelElements(["a", "b", "c"]) - with self.assertWarns(DeprecationWarning): - self.assertEqual(ble.index("a"), 0) - with self.assertWarns(DeprecationWarning): - self.assertEqual(ble.index("b"), 1) - with self.assertWarns(DeprecationWarning): - self.assertEqual(ble.index("c"), 2) - with self.assertWarns(DeprecationWarning): - self.assertRaises(ValueError, ble.index, "d") - - def test_index_duplicates(self): - ble = _BlockLevelElements(["a", "b", "b"]) - with self.assertWarns(DeprecationWarning): - self.assertEqual(ble.index("b"), 1) - with self.assertWarns(DeprecationWarning): - self.assertEqual(ble.index("b", 2), 2) - - def test_insert(self): - ble = _BlockLevelElements(["a", "b", "c"]) - with self.assertWarns(DeprecationWarning): - ble.insert(1, "d") - self.assertEqual(ble._list, ["a", "d", "b", "c"]) - self.assertEqual(ble._set, {"a", "b", "c", "d"}) - with self.assertWarns(DeprecationWarning): - ble.insert(100, "e") - self.assertIn("e", ble) - - def test_insert_duplicates(self): - ble = _BlockLevelElements(["a", "a", "b"]) - with self.assertWarns(DeprecationWarning): - ble.insert(1, "b") - self.assertEqual(ble._list, ["a", "b", "a", "b"]) - self.assertEqual(ble._set, {"a", "b"}) - - def test_intersection(self): - ble = _BlockLevelElements(["a", "b"]) - ble2 = ble.intersection({"b", "c"}) - self.assertIsInstance(ble2, set) - self.assertEqual(ble2, {"b"}) - - def test_intersection_update(self): - ble = _BlockLevelElements(["a", "b"]) - ble.intersection_update({"b", "c"}) - self.assertEqual(ble._list, ["b"]) - self.assertEqual(ble._set, {"b"}) - - def test_isdisjoint(self): - ble = _BlockLevelElements(["a", "b"]) - self.assertFalse(ble.isdisjoint({"b", "c"})) - self.assertTrue(ble.isdisjoint({"c", "d"})) - - def test_issubset(self): - ble = _BlockLevelElements(["a", "b"]) - self.assertTrue(ble.issubset({"a", "b", "c"})) - self.assertFalse(ble.issubset({"a", "c"})) - - def test_issuperset(self): - ble = _BlockLevelElements(["a", "b"]) - self.assertTrue(ble.issuperset({"a"})) - self.assertTrue(ble.issuperset({"a", "b"})) - self.assertFalse(ble.issuperset({"a", "c"})) - - def test_pop(self): - ble = _BlockLevelElements(["a", "b", "c"]) - self.assertEqual(ble.pop(), "c") - self.assertEqual(ble._list, ["a", "b"]) - self.assertEqual(ble._set, {"a", "b"}) - with self.assertWarns(DeprecationWarning): - self.assertEqual(ble.pop(0), "a") - self.assertEqual(ble._list, ["b"]) - self.assertEqual(ble._set, {"b"}) - with self.assertWarns(DeprecationWarning): - self.assertRaises(IndexError, ble.pop, 10) - - def test_pop_duplicates(self): - ble = _BlockLevelElements(["a", "a", "b", "b"]) - self.assertEqual(ble.pop(), "b") - self.assertEqual(ble._list, ["a", "a", "b"]) - self.assertEqual(ble._set, {"a", "b"}) - with self.assertWarns(DeprecationWarning): - self.assertEqual(ble.pop(0), "a") - self.assertEqual(ble._list, ["a", "b"]) - self.assertEqual(ble._set, {"a", "b"}) - self.assertEqual(ble.pop(), "b") - self.assertEqual(ble._list, ["a"]) - self.assertEqual(ble._set, {"a"}) - - def test_remove(self): - ble = _BlockLevelElements(["a", "b", "c"]) - ble.remove("b") - self.assertEqual(ble._list, ["a", "c"]) - self.assertEqual(ble._set, {"a", "c"}) - self.assertRaises(KeyError, ble.remove, "d") - - def test_remove_duplicates(self): - ble = _BlockLevelElements(["a", "a", "b"]) - ble.remove("a") - self.assertEqual(ble._list, ["b"]) - self.assertEqual(ble._set, {"b"}) - - def test_reverse(self): - ble = _BlockLevelElements(["a", "b", "c"]) - with self.assertWarns(DeprecationWarning): - ble.reverse() - self.assertEqual(ble._list, ["c", "b", "a"]) - self.assertEqual(ble._set, {"a", "b", "c"}) - - def test_reverse_duplicates(self): - ble = _BlockLevelElements(["a", "a", "b"]) - with self.assertWarns(DeprecationWarning): - ble.reverse() - self.assertEqual(ble._list, ["b", "a", "a"]) - self.assertEqual(ble._set, {"a", "b"}) - - def test_sort(self): - ble = _BlockLevelElements(["c", "a", "b"]) - with self.assertWarns(DeprecationWarning): - ble.sort() - self.assertEqual(ble._list, ["a", "b", "c"]) - self.assertEqual(ble._set, {"a", "b", "c"}) - - def test_sort_duplicates(self): - ble = _BlockLevelElements(["b", "a", "b"]) - with self.assertWarns(DeprecationWarning): - ble.sort() - self.assertEqual(ble._list, ["a", "b", "b"]) - self.assertEqual(ble._set, {"a", "b"}) - - def test_symmetric_difference(self): - ble = _BlockLevelElements(["a", "b"]) - ble2 = ble.symmetric_difference({"b", "c"}) - self.assertIsInstance(ble2, set) - self.assertEqual(ble2, {"a", "c"}) - - def test_symmetric_difference_update(self): - ble = _BlockLevelElements(["a", "b"]) - ble.symmetric_difference_update({"b", "c"}) - self.assertEqual(ble._list, ["a", "c"]) - self.assertEqual(ble._set, {"a", "c"}) - - def test_union(self): - ble = _BlockLevelElements(["a", "b"]) - ble2 = ble.union({"b", "c"}) - self.assertIsInstance(ble2, set) - self.assertEqual(ble2, {"a", "b", "c"}) - - def test_update(self): - ble = _BlockLevelElements(["a", "b"]) - ble.update({"b", "c"}) - self.assertEqual(ble._list, ["a", "b", "c"]) - self.assertEqual(ble._set, {"a", "b", "c"}) - - # Special tests - def test_isinstance(self): - ble = _BlockLevelElements([]) - self.assertIsInstance(ble, _BlockLevelElements) - self.assertIsInstance(ble, list)