1
- import html
2
1
import re
3
2
from typing import Callable , Optional
4
3
from urllib .parse import urlparse , urlunparse , quote , unquote # noqa: F401
5
4
6
5
import mdurl
7
6
8
- from .utils import ESCAPABLE
9
7
from .. import _punycode
10
8
11
9
12
- # ################# Copied from Commonmark.py #################
13
-
14
- ENTITY = "&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});"
15
- reBackslashOrAmp = re .compile (r"[\\&]" )
16
- reEntityOrEscapedChar = re .compile (
17
- "\\ \\ " + "[" + ESCAPABLE + "]|" + ENTITY , re .IGNORECASE
18
- )
19
-
20
-
21
- def unescape_char (s : str ) -> str :
22
- if s [0 ] == "\\ " :
23
- return s [1 ]
24
- else :
25
- return html .unescape (s )
26
-
27
-
28
- def unescape_string (s : str ) -> str :
29
- """Replace entities and backslash escapes with literal characters."""
30
- if re .search (reBackslashOrAmp , s ):
31
- return re .sub (reEntityOrEscapedChar , lambda m : unescape_char (m .group ()), s )
32
- else :
33
- return s
34
-
35
-
36
- def normalize_uri (uri : str ) -> str :
37
- return quote (uri , safe = "/@:+?=&()%#*," )
38
-
39
-
40
- ##################
41
-
42
-
43
10
RECODE_HOSTNAME_FOR = ("http:" , "https:" , "mailto:" )
44
11
45
12
46
- def unescape_normalize_uri (x : str ) -> str :
47
- return normalize_uri (unescape_string (x ))
48
-
49
-
50
13
def normalizeLink (url : str ) -> str :
51
14
"""Normalize destination URLs in links
52
15
@@ -73,10 +36,6 @@ def normalizeLink(url: str) -> str:
73
36
return mdurl .encode (mdurl .format (parsed ))
74
37
75
38
76
- def unescape_unquote (x : str ) -> str :
77
- return unquote (unescape_string (x ))
78
-
79
-
80
39
def normalizeLinkText (url : str ) -> str :
81
40
"""Normalize autolink content
82
41
0 commit comments