Skip to content

Commit 792e2d3

Browse files
committed
Adopt PythonMethod TRegex option for search, match and fullmatch support
1 parent 3b8e7dc commit 792e2d3

File tree

1 file changed

+18
-19
lines changed

1 file changed

+18
-19
lines changed

graalpython/lib-graalpython/_sre.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -349,12 +349,11 @@ def __check_input_type(self, input):
349349
if self.__binary and isinstance(input, str):
350350
raise TypeError("cannot use a bytes pattern on a string-like object")
351351

352-
def __tregex_compile(self, other_pattern=None, extra_flags="", extra_options=""):
353-
pattern = other_pattern or self.pattern
354-
flags = self.__flags_str + extra_flags
355-
if (pattern, flags, extra_options) not in self.__compiled_regexes:
352+
def __tregex_compile(self, method="search", must_advance=False):
353+
if (method, must_advance) not in self.__compiled_regexes:
356354
try:
357-
self.__compiled_regexes[(pattern, flags, extra_options)] = tregex_compile_internal(pattern, flags, extra_options, fallback_compiler)
355+
extra_options = f"PythonMethod={method},MustAdvance={'true' if must_advance else 'false'}"
356+
self.__compiled_regexes[(method, must_advance)] = tregex_compile_internal(self.pattern, self.__flags_str, extra_options, fallback_compiler)
358357
except ValueError as e:
359358
if len(e.args) == 2:
360359
msg = e.args[0]
@@ -367,7 +366,7 @@ def __tregex_compile(self, other_pattern=None, extra_flags="", extra_options="")
367366
raise ValueError(msg) from None
368367
raise error(msg, self.pattern, e.args[1]) from None
369368
raise
370-
return self.__compiled_regexes[(pattern, flags, extra_options)]
369+
return self.__compiled_regexes[(method, must_advance)]
371370

372371
def __repr__(self):
373372
flags = self.flags
@@ -405,25 +404,25 @@ def __copy__(self):
405404
def __deepcopy__(self, memo):
406405
return self
407406

408-
def _search(self, string, pos, endpos, other_pattern=None, sticky=False, must_advance=False):
407+
def _search(self, string, pos, endpos, method="search", must_advance=False):
409408
_check_pos(pos)
410409
self.__check_input_type(string)
411410
substring, pos, endpos = _normalize_bounds(string, pos, endpos)
412-
compiled_regex = self.__tregex_compile(other_pattern=other_pattern, extra_flags="y" if sticky else "", extra_options="MustAdvance=true" if must_advance else "")
411+
compiled_regex = self.__tregex_compile(method=method, must_advance=must_advance)
413412
result = tregex_call_exec(compiled_regex.exec, substring, pos)
414413
if result.isMatch:
415414
return Match(self, pos, endpos, result, string, self.__indexgroup)
416415
else:
417416
return None
418417

419418
def search(self, string, pos=0, endpos=maxsize):
420-
return self._search(string, pos, endpos)
419+
return self._search(string, pos, endpos, method="search")
421420

422421
def match(self, string, pos=0, endpos=maxsize):
423-
return self._search(string, pos, endpos, sticky=True)
422+
return self._search(string, pos, endpos, method="match")
424423

425424
def fullmatch(self, string, pos=0, endpos=maxsize):
426-
return self._search(string, pos, endpos, sticky=True, other_pattern=_append_end_assert(self.pattern))
425+
return self._search(string, pos, endpos, method="fullmatch")
427426

428427
def __sanitize_out_type(self, elem):
429428
"""Helper function for findall and split. Ensures that the type of the elements of the
@@ -444,7 +443,7 @@ def finditer(self, string, pos=0, endpos=maxsize):
444443
def __finditer_gen(self, string, substring, pos, endpos):
445444
must_advance = False
446445
while pos <= endpos:
447-
compiled_regex = self.__tregex_compile(extra_options = "MustAdvance=true" if must_advance else "")
446+
compiled_regex = self.__tregex_compile(must_advance=must_advance)
448447
result = tregex_call_exec(compiled_regex.exec, substring, pos)
449448
if not result.isMatch:
450449
break
@@ -462,7 +461,7 @@ def findall(self, string, pos=0, endpos=maxsize):
462461
group_count = self.__tregex_compile().groupCount
463462
must_advance = False
464463
while pos <= endpos:
465-
compiled_regex = self.__tregex_compile(extra_options = "MustAdvance=true" if must_advance else "")
464+
compiled_regex = self.__tregex_compile(must_advance=must_advance)
466465
result = tregex_call_exec(compiled_regex.exec, substring, pos)
467466
if not result.isMatch:
468467
break
@@ -499,7 +498,7 @@ def subn(self, repl, string, count=0):
499498
literal = True
500499

501500
while (count == 0 or n < count) and pos <= len(string):
502-
compiled_regex = self.__tregex_compile(extra_options = "MustAdvance=true" if must_advance else "")
501+
compiled_regex = self.__tregex_compile(must_advance=must_advance)
503502
match_result = tregex_call_exec(compiled_regex.exec, string, pos)
504503
if not match_result.isMatch:
505504
break
@@ -529,7 +528,7 @@ def split(self, string, maxsplit=0):
529528
search_pos = 0
530529
must_advance = False
531530
while (maxsplit == 0 or n < maxsplit) and search_pos <= len(string):
532-
compiled_regex = self.__tregex_compile(extra_options = "MustAdvance=true" if must_advance else "")
531+
compiled_regex = self.__tregex_compile(must_advance=must_advance)
533532
match_result = tregex_call_exec(compiled_regex.exec, string, search_pos)
534533
if not match_result.isMatch:
535534
break
@@ -562,10 +561,10 @@ def __init__(self, pattern, string, start, end):
562561
self._end = end
563562
self._must_advance = False
564563

565-
def _match_search(self, sticky):
564+
def _match_search(self, method):
566565
if self._start > len(self._string):
567566
return None
568-
match = self.pattern._search(self._string, self._start, self._end, sticky = sticky, must_advance = self._must_advance)
567+
match = self.pattern._search(self._string, self._start, self._end, method=method, must_advance=self._must_advance)
569568
if match is None:
570569
self._start += 1
571570
else:
@@ -574,10 +573,10 @@ def _match_search(self, sticky):
574573
return match
575574

576575
def match(self):
577-
return self._match_search(True)
576+
return self._match_search("match")
578577

579578
def search(self):
580-
return self._match_search(False)
579+
return self._match_search("search")
581580

582581

583582
_t_compile = Pattern

0 commit comments

Comments
 (0)