25
25
from . import Extension
26
26
from ..blockprocessors import BlockProcessor
27
27
from ..inlinepatterns import InlineProcessor
28
- from ..util import AtomicString
28
+ from ..treeprocessors import Treeprocessor
29
+ from ..util import AtomicString , deprecated
30
+ from typing import TYPE_CHECKING
29
31
import re
30
32
import xml .etree .ElementTree as etree
31
33
34
+ if TYPE_CHECKING : # pragma: no cover
35
+ from .. import Markdown
36
+ from ..blockparsers import BlockParser
37
+
32
38
33
39
class AbbrExtension (Extension ):
34
40
""" Abbreviation Extension for Python-Markdown. """
35
41
36
- def extendMarkdown (self , md ):
37
- """ Insert `AbbrPreprocessor` before `ReferencePreprocessor`. """
38
- md .parser .blockprocessors .register (AbbrPreprocessor (md .parser ), 'abbr' , 16 )
42
+ def __init__ (self , ** kwargs ):
43
+ """ Initiate Extension and set up configs. """
44
+ super ().__init__ (** kwargs )
45
+ self .abbrs = {}
39
46
47
+ def reset (self ):
48
+ """ Clear all previously defined abbreviations. """
49
+ self .abbrs .clear ()
40
50
41
- class AbbrPreprocessor (BlockProcessor ):
42
- """ Abbreviation Preprocessor - parse text for abbr references. """
51
+ def extendMarkdown (self , md ):
52
+ """ Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """
53
+ md .registerExtension (self )
54
+ md .treeprocessors .register (AbbrTreeprocessor (md , self .abbrs ), 'abbr' , 7 )
55
+ md .parser .blockprocessors .register (AbbrBlockprocessor (md .parser , self .abbrs ), 'abbr' , 16 )
56
+
57
+
58
+ class AbbrTreeprocessor (Treeprocessor ):
59
+ """ Replace abbreviation text with `<abbr>` elements. """
60
+
61
+ def __init__ (self , md : Markdown | None = None , abbrs : dict | None = None ):
62
+ self .abbrs : dict = abbrs if abbrs is not None else {}
63
+ self .RE : re .RegexObject | None = None
64
+ super ().__init__ (md )
65
+
66
+ def iter_element (self , el : etree .Element , parent : etree .Element | None = None ) -> None :
67
+ ''' Recursively iterate over elements, run regex on text and wrap matches in `abbr` tags. '''
68
+ for child in reversed (el ):
69
+ self .iter_element (child , el )
70
+ if text := el .text :
71
+ for m in reversed (list (self .RE .finditer (text ))):
72
+ abbr = etree .Element ('abbr' , {'title' : self .abbrs [m .group (0 )]})
73
+ abbr .text = AtomicString (m .group (0 ))
74
+ abbr .tail = text [m .end ():]
75
+ el .insert (0 , abbr )
76
+ text = text [:m .start ()]
77
+ el .text = text
78
+ if parent and el .tail :
79
+ tail = el .tail
80
+ index = list (parent ).index (el ) + 1
81
+ for m in reversed (list (self .RE .finditer (tail ))):
82
+ abbr = etree .Element ('abbr' , {'title' : self .abbrs [m .group (0 )]})
83
+ abbr .text = AtomicString (m .group (0 ))
84
+ abbr .tail = tail [m .end ():]
85
+ parent .insert (index , abbr )
86
+ tail = tail [:m .start ()]
87
+ el .tail = tail
88
+
89
+ def run (self , root : etree .Element ) -> etree .Element | None :
90
+ ''' Step through tree to find known abbreviations. '''
91
+ if not self .abbrs :
92
+ # No abbreviations defined. Skip running processor.
93
+ return
94
+ # Build and compile regex
95
+ self .RE = re .compile (f"\\ b(?:{ '|' .join (re .escape (key ) for key in self .abbrs ) } )\\ b" )
96
+ # Step through tree and modify on matches
97
+ self .iter_element (root )
98
+
99
+
100
+ class AbbrBlockprocessor (BlockProcessor ):
101
+ """ Parse text for abbreviation references. """
43
102
44
103
RE = re .compile (r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$' , re .MULTILINE )
45
104
105
+ def __init__ (self , parser : BlockParser , abbrs : dict ):
106
+ self .abbrs : dict = abbrs
107
+ super ().__init__ (parser )
108
+
46
109
def test (self , parent : etree .Element , block : str ) -> bool :
47
110
return True
48
111
49
112
def run (self , parent : etree .Element , blocks : list [str ]) -> bool :
50
113
"""
51
- Find and remove all Abbreviation references from the text.
52
- Each reference is set as a new `AbbrPattern` in the markdown instance .
114
+ Find and remove all abbreviation references from the text.
115
+ Each reference is added to the abbreviation collection .
53
116
54
117
"""
55
118
block = blocks .pop (0 )
56
119
m = self .RE .search (block )
57
120
if m :
58
121
abbr = m .group ('abbr' ).strip ()
59
122
title = m .group ('title' ).strip ()
60
- self .parser .md .inlinePatterns .register (
61
- AbbrInlineProcessor (self ._generate_pattern (abbr ), title ), 'abbr-%s' % abbr , 2
62
- )
123
+ self .abbrs [abbr ] = title
63
124
if block [m .end ():].strip ():
64
125
# Add any content after match back to blocks as separate block
65
126
blocks .insert (0 , block [m .end ():].lstrip ('\n ' ))
@@ -71,11 +132,11 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
71
132
blocks .insert (0 , block )
72
133
return False
73
134
74
- def _generate_pattern (self , text : str ) -> str :
75
- """ Given a string, returns a regex pattern to match that string. """
76
- return f"(?P<abbr>\\ b{ re .escape (text ) } \\ b)"
135
+
136
+ AbbrPreprocessor = deprecated ("This class has been renamed to `AbbrBlockprocessor`." )(AbbrBlockprocessor )
77
137
78
138
139
+ @deprecated ("This class will be removed in the future; use `AbbrTreeprocessor` instead." )
79
140
class AbbrInlineProcessor (InlineProcessor ):
80
141
""" Abbreviation inline pattern. """
81
142
0 commit comments