Skip to content

Commit ba88fc3

Browse files
Simplify string tokenization regexes (#4331)
1 parent 5683242 commit ba88fc3

File tree

3 files changed

+18
-12
lines changed

3 files changed

+18
-12
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030

3131
<!-- Changes that improve Black's performance. -->
3232

33+
- Fix bad performance on certain complex string literals (#4331)
34+
3335
### Output
3436

3537
<!-- Changes to Black's terminal output and error messages -->

src/blib2to3/pgen2/tokenize.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -119,13 +119,13 @@ def _combinations(*l: str) -> Set[str]:
119119
Number = group(Imagnumber, Floatnumber, Intnumber)
120120

121121
# Tail end of ' string.
122-
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
122+
Single = r"(?:\\.|[^'\\])*'"
123123
# Tail end of " string.
124-
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
124+
Double = r'(?:\\.|[^"\\])*"'
125125
# Tail end of ''' string.
126-
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
126+
Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''"
127127
# Tail end of """ string.
128-
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
128+
Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""'
129129
_litprefix = r"(?:[uUrRbB]|[rR][bB]|[bBuU][rR])?"
130130
_fstringlitprefix = r"(?:rF|FR|Fr|fr|RF|F|rf|f|Rf|fR)"
131131
Triple = group(
@@ -136,12 +136,12 @@ def _combinations(*l: str) -> Set[str]:
136136
)
137137

138138
# beginning of a single quoted f-string. must not end with `{{` or `\N{`
139-
SingleLbrace = r"[^'\\{]*(?:(?:\\N{|\\.|{{)[^'\\{]*)*(?<!\\N){(?!{)"
140-
DoubleLbrace = r'[^"\\{]*(?:(?:\\N{|\\.|{{)[^"\\{]*)*(?<!\\N){(?!{)'
139+
SingleLbrace = r"(?:\\N{|\\.|{{|[^'\\{])*(?<!\\N){(?!{)"
140+
DoubleLbrace = r'(?:\\N{|\\.|{{|[^"\\{])*(?<!\\N){(?!{)'
141141

142142
# beginning of a triple quoted f-string. must not end with `{{` or `\N{`
143-
Single3Lbrace = r"[^'{]*(?:(?:\\N{|\\[^{]|{{|'(?!''))[^'{]*)*(?<!\\N){(?!{)"
144-
Double3Lbrace = r'[^"{]*(?:(?:\\N{|\\[^{]|{{|"(?!""))[^"{]*)*(?<!\\N){(?!{)'
143+
Single3Lbrace = r"(?:\\N{|\\[^{]|{{|'(?!'')|[^'{\\])*(?<!\\N){(?!{)"
144+
Double3Lbrace = r'(?:\\N{|\\[^{]|{{|"(?!"")|[^"{\\])*(?<!\\N){(?!{)'
145145

146146
# ! format specifier inside an fstring brace, ensure it's not a `!=` token
147147
Bang = Whitespace + group("!") + r"(?!=)"
@@ -171,12 +171,12 @@ def _combinations(*l: str) -> Set[str]:
171171
Special = group(r"\r?\n", r"[:;.,`@]")
172172
Funny = group(Operator, Bracket, Special)
173173

174-
_string_middle_single = r"[^\n'\\]*(?:\\.[^\n'\\]*)*"
175-
_string_middle_double = r'[^\n"\\]*(?:\\.[^\n"\\]*)*'
174+
_string_middle_single = r"(?:[^\n'\\]|\\.)*"
175+
_string_middle_double = r'(?:[^\n"\\]|\\.)*'
176176

177177
# FSTRING_MIDDLE and LBRACE, must not end with a `{{` or `\N{`
178-
_fstring_middle_single = r"[^\n'{]*(?:(?:\\N{|\\[^{]|{{)[^\n'{]*)*(?<!\\N)({)(?!{)"
179-
_fstring_middle_double = r'[^\n"{]*(?:(?:\\N{|\\[^{]|{{)[^\n"{]*)*(?<!\\N)({)(?!{)'
178+
_fstring_middle_single = r"(?:\\N{|\\[^{]|{{|[^\n'{\\])*(?<!\\N)({)(?!{)"
179+
_fstring_middle_double = r'(?:\\N{|\\[^{]|{{|[^\n"{\\])*(?<!\\N)({)(?!{)'
180180

181181
# First (or only) line of ' or " string.
182182
ContStr = group(

tests/data/cases/pep_701.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@
119119
level=0,
120120
)
121121

122+
f'{{\\"kind\\":\\"ConfigMap\\",\\"metadata\\":{{\\"annotations\\":{{}},\\"name\\":\\"cluster-info\\",\\"namespace\\":\\"amazon-cloudwatch\\"}}}}'
123+
122124
# output
123125

124126
x = f"foo"
@@ -240,3 +242,5 @@
240242
f"{self.writer._transport.get_extra_info('peername')}", # type: ignore[attr-defined]
241243
level=0,
242244
)
245+
246+
f'{{\\"kind\\":\\"ConfigMap\\",\\"metadata\\":{{\\"annotations\\":{{}},\\"name\\":\\"cluster-info\\",\\"namespace\\":\\"amazon-cloudwatch\\"}}}}'

0 commit comments

Comments
 (0)