diff --git a/docs/changelog.md b/docs/changelog.md index 34817bdd..17829a61 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * DRY fix in `abbr` extension by introducing method `create_element` (#1483). * Clean up test directory some removing some redundant tests and port non-redundant cases to the newer test framework. +* Improved performance of the raw HTML post-processor (#1510). ### Fixed diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 7f5ede90..d4b0e1fd 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -28,7 +28,6 @@ from __future__ import annotations -from collections import OrderedDict from typing import TYPE_CHECKING, Any from . import util import re @@ -73,37 +72,26 @@ class RawHtmlPostprocessor(Postprocessor): def run(self, text: str) -> str: """ Iterate over html stash and restore html. """ - replacements = OrderedDict() - for i in range(self.md.htmlStash.html_counter): - html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i]) - if self.isblocklevel(html): - replacements["

{}

".format( - self.md.htmlStash.get_placeholder(i))] = html - replacements[self.md.htmlStash.get_placeholder(i)] = html - def substitute_match(m: re.Match[str]) -> str: - key = m.group(0) - - if key not in replacements: - if key[3:-4] in replacements: - return f'

{ replacements[key[3:-4]] }

' - else: - return key - - return replacements[key] - - if replacements: + if key := m.group(1): + wrapped = True + else: + key = m.group(2) + wrapped = False + if (key := int(key)) >= self.md.htmlStash.html_counter: + return m.group(0) + html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[key]) + if not wrapped or self.isblocklevel(html): + return pattern.sub(substitute_match, html) + return pattern.sub(substitute_match, f"

{html}

") + + if self.md.htmlStash.html_counter: base_placeholder = util.HTML_PLACEHOLDER % r'([0-9]+)' pattern = re.compile(f'

{ base_placeholder }

|{ base_placeholder }') - processed_text = pattern.sub(substitute_match, text) + return pattern.sub(substitute_match, text) else: return text - if processed_text == text: - return processed_text - else: - return self.run(processed_text) - def isblocklevel(self, html: str) -> bool: """ Check is block of HTML is block-level. """ m = self.BLOCK_LEVEL_REGEX.match(html)