4
4
5
5
from __future__ import annotations
6
6
7
- from io import StringIO
7
+ from io import (
8
+ BytesIO ,
9
+ StringIO ,
10
+ )
8
11
from keyword import iskeyword
9
12
import token
10
13
import tokenize
@@ -58,7 +61,7 @@ def create_valid_python_identifier(name: str) -> str:
58
61
"'" : "_SINGLEQUOTE_" ,
59
62
'"' : "_DOUBLEQUOTE_" ,
60
63
# Currently not possible. Terminates parser and won't find backtick.
61
- # "#": "_HASH_",
64
+ "#" : "_HASH_" ,
62
65
}
63
66
)
64
67
@@ -168,6 +171,69 @@ def tokenize_backtick_quoted_string(
168
171
return BACKTICK_QUOTED_STRING , source [string_start :string_end ]
169
172
170
173
174
+ def split_by_backtick (s : str ) -> list [tuple [bool , str ]]:
175
+ substrings = []
176
+ substring = ""
177
+ i = 0
178
+ while i < len (s ):
179
+ backtick_index = s .find ("`" , i )
180
+
181
+ # No backticks
182
+ if backtick_index == - 1 :
183
+ substrings .append ((False , substring + s [i :]))
184
+ break
185
+
186
+ single_quote_index = s .find ("'" , i )
187
+ double_quote_index = s .find ('"' , i )
188
+ if (single_quote_index == - 1 ) and (double_quote_index == - 1 ):
189
+ quote_index = - 1
190
+ elif single_quote_index == - 1 :
191
+ quote_index = double_quote_index
192
+ elif double_quote_index == - 1 :
193
+ quote_index = single_quote_index
194
+ else :
195
+ quote_index = min (single_quote_index , double_quote_index )
196
+
197
+ # No quotes
198
+ if quote_index == - 1 :
199
+ next_backtick_index = s .find ("`" , backtick_index + 1 )
200
+ # Backtick opened before quote
201
+ elif backtick_index < quote_index :
202
+ next_backtick_index = s .find ("`" , backtick_index + 1 )
203
+ # Quote opened before backtick
204
+ else :
205
+ next_quote_index = - 1
206
+ line_reader = BytesIO (s [i :].encode ("utf-8" )).readline
207
+ token_generator = tokenize .tokenize (line_reader )
208
+ for toknum , _ , (_ , _ ), (_ , end ), _ in token_generator :
209
+ if toknum == tokenize .STRING :
210
+ next_quote_index = i + end - 1
211
+ break
212
+
213
+ # Quote is unmatched
214
+ if next_quote_index == - 1 :
215
+ next_backtick_index = s .find ("`" , backtick_index + 1 )
216
+ # Quote is matched
217
+ else :
218
+ substring += s [i :next_quote_index ]
219
+ i = next_quote_index
220
+ continue
221
+
222
+ # Backtick is unmatched
223
+ if next_backtick_index == - 1 :
224
+ substrings .append ((False , substring + s [i :]))
225
+ break
226
+ # Backtick is matched
227
+ else :
228
+ if i != backtick_index :
229
+ substrings .append ((False , substring + s [i :backtick_index ]))
230
+ substrings .append ((True , s [backtick_index : next_backtick_index + 1 ]))
231
+ substring = ""
232
+ i = next_backtick_index + 1
233
+
234
+ return substrings
235
+
236
+
171
237
def tokenize_string (source : str ) -> Iterator [tuple [int , str ]]:
172
238
"""
173
239
Tokenize a Python source code string.
@@ -182,6 +248,16 @@ def tokenize_string(source: str) -> Iterator[tuple[int, str]]:
182
248
tok_generator : Iterator[Tuple[int, str]]
183
249
An iterator yielding all tokens with only toknum and tokval (Tuple[ing, str]).
184
250
"""
251
+ # GH 59285
252
+ source = "" .join (
253
+ (
254
+ f"`{ create_valid_python_identifier (substring [1 :- 1 ])} `"
255
+ if is_backticked
256
+ else substring
257
+ )
258
+ for is_backticked , substring in split_by_backtick (source )
259
+ )
260
+
185
261
line_reader = StringIO (source ).readline
186
262
token_generator = tokenize .generate_tokens (line_reader )
187
263
0 commit comments