diff --git a/docs/change_log/index.md b/docs/change_log/index.md index fffa216ca..a47afecc7 100644 --- a/docs/change_log/index.md +++ b/docs/change_log/index.md @@ -7,6 +7,7 @@ Under development: version 3.3.3 (a bug-fix release). * Unify all block-level tags (#1047). * Fix issue where some empty elements would have text rendered as `None` when using `md_in_html` (#1049). +* Avoid catastrophic backtracking in `hr` regex (#1055). Oct 19, 2020: version 3.3.2 (a bug-fix release). diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index 742f17470..7d31a7ffa 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -496,16 +496,15 @@ def run(self, parent, blocks): class HRProcessor(BlockProcessor): """ Process Horizontal Rules. """ - RE = r'^[ ]{0,3}((-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,})[ ]*$' + # Python's re module doesn't officially support atomic grouping. However you can fake it. + # See https://stackoverflow.com/a/13577411/866026 + RE = r'^[ ]{0,3}(?=(?P(-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,}))(?P=atomicgroup)[ ]*$' # Detect hr on any line of a block. SEARCH_RE = re.compile(RE, re.MULTILINE) def test(self, parent, block): m = self.SEARCH_RE.search(block) - # No atomic grouping in python so we simulate it here for performance. - # The regex only matches what would be in the atomic group - the HR. - # Then check if we are at end of block or if next char is a newline. - if m and (m.end() == len(block) or block[m.end()] == '\n'): + if m: # Save match object on class instance so we can use it later. self.match = m return True diff --git a/tests/test_syntax/blocks/test_hr.py b/tests/test_syntax/blocks/test_hr.py index 009a39d46..85a51b32f 100644 --- a/tests/test_syntax/blocks/test_hr.py +++ b/tests/test_syntax/blocks/test_hr.py @@ -377,3 +377,26 @@ def test_not_hr_2_underscores_spaces(self): '

_ _

' ) + + def test_2_consecutive_hr(self): + self.assertMarkdownRenders( + self.dedent( + """ + - - - + - - - + """ + ), + self.dedent( + """ +
+
+ """ + ) + ) + + def test_not_hr_end_in_char(self): + self.assertMarkdownRenders( + '--------------------------------------c', + + '

--------------------------------------c

' + )